diff --git a/.gitignore b/.gitignore index ef9f13c..2744889 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ build .vscode -unit_tests_backend *.txt python_lib/dl_lib/_compiled -*__pycache__* \ No newline at end of file +*__pycache__* +*_cache + +# TODO: remove later +benchmarks \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index b09189f..e317ea5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ endif() add_compile_options("$<$:/utf-8>") add_compile_options("$<$:/utf-8>") -option(DOUBLE_PRECISION "Compile with double precision at cost of speed?" OFF) # TODO: not implemented yet +# TODO: add flag for double precision? # include python libs if(APPLE) @@ -40,7 +40,7 @@ if(APPLE) message("Python_INCLUDE_DIRS:${${Python_INCLUDE_DIRS}}") set(PYTHON_LIBRARIES ${Python_LIBRARIES}) - string(COMPARE EQUAL "${PYTHON_LIBRARIES}" "" PYTHONLIBS_EMPTY) + string(COMPARE EQUAL "${Python_LIBRARIES}" "" PYTHONLIBS_EMPTY) if(PYTHONLIBS_EMPTY) message(FATAL_ERROR "Problem: PYTHON_LIBRARIES not found. Do you have Python installed on your machine?") endif() @@ -50,19 +50,18 @@ if(APPLE) message("Failed to automatically find Python_INCLUDE_DIRS. Setting the PYTHON_INCLUDE_DIRS variable manually. If this crashes please adjust the following path to the path where Python.h resides (the one matching the found Python instance). Paths must be consistent iff multiple Python versions on machine.") set(PYTHON_H_PATH "/usr/local/opt/python@3.13/Frameworks/Python.framework/Versions/3.13/include/python3.13") - set(PYTHON_INCLUDE_DIRS "${PYTHON_H_PATH}") + set(PYTHON_INCLUDE_DIRS "${Python_H_PATH}") else() set(PYTHON_INCLUDE_DIRS ${Python_INCLUDE_DIRS}) endif() #FindPython3() - message("Apple - Using Python:${Python_VERSION_MAJOR} - Libraries:${PYTHON_LIBRARIES} - IncludeDirs: ${PYTHON_INCLUDE_DIRS}") + message("Apple - Using Python:${Python_VERSION_MAJOR} - Libraries:${Python_LIBRARIES} - IncludeDirs: ${Python_INCLUDE_DIRS}") else() message("Getting PythonLibs on Linux or Windows path") - find_package(PythonLibs REQUIRED) + find_package(Python 3 REQUIRED COMPONENTS Interpreter Development) endif() -include_directories(${PYTHON_INCLUDE_DIRS}) -message("Using Python:${Python_VERSION_MAJOR} - Libraries:${PYTHON_LIBRARIES} - IncludeDirs: ${PYTHON_INCLUDE_DIRS}") - +include_directories(${Python_INCLUDE_DIRS}) +message("Using Python:${Python_VERSION_MAJOR} - Libraries:${Python_LIBRARIES} - IncludeDirs: ${Python_INCLUDE_DIRS}") #set(CMAKE_MESSAGE_LOG_LEVEL WARNING) @@ -108,4 +107,9 @@ option(BUILD_TESTS "Build tests" OFF) if(BUILD_TESTS) enable_testing() add_subdirectory(tests) +endif() + +option(BUILD_BENCHMARKS "Build benchmarks" OFF) +if(BUILD_BENCHMARKS) + add_subdirectory(benchmarks) endif() \ No newline at end of file diff --git a/python_lib/dl_lib/__init__.py b/python_lib/dl_lib/__init__.py index 9817df9..e7f6844 100644 --- a/python_lib/dl_lib/__init__.py +++ b/python_lib/dl_lib/__init__.py @@ -1,3 +1,5 @@ -from ._compiled._core import Tensor, Dimension, Device, Ones, Zeros, Gaussian +from ._compiled._core import Tensor, Dimension, Device -__all__ = ['Tensor', 'Device', 'Dimension'] \ No newline at end of file +__all__ = ['Tensor', 'Device', 'Dimension'] + +__version__ = "0.2.0" \ No newline at end of file diff --git a/python_lib/dl_lib/nn/__init__.py b/python_lib/dl_lib/nn/__init__.py index 75fefbc..774cbe0 100644 --- a/python_lib/dl_lib/nn/__init__.py +++ b/python_lib/dl_lib/nn/__init__.py @@ -1,4 +1,5 @@ -#from .._compiled._layers import FfLayer, ReLU +from .module import Module, Sequential +from dl_lib._compiled._nn import FfLayer #from .._compiled._core import Tensor # re-export if needed -#__all__ = ['FfLayer', 'ReLU'] \ No newline at end of file +__all__ = ['Module', 'Sequential', 'FfLayer'] \ No newline at end of file diff --git a/python_lib/dl_lib/nn/activation/__init__.py b/python_lib/dl_lib/nn/activation/__init__.py new file mode 100644 index 0000000..0ab1bab --- /dev/null +++ b/python_lib/dl_lib/nn/activation/__init__.py @@ -0,0 +1,4 @@ +from dl_lib._compiled._nn import ReLU, LeakyReLU, Softmax +#from .._compiled._core import Tensor # re-export if needed + +__all__ = ['ReLU', 'LeakyReLU', 'Softmax'] \ No newline at end of file diff --git a/python_lib/dl_lib/nn/module.py b/python_lib/dl_lib/nn/module.py new file mode 100644 index 0000000..d4a232a --- /dev/null +++ b/python_lib/dl_lib/nn/module.py @@ -0,0 +1,59 @@ +""" +Module base class. We use it to automatically register network +modules when defining graphs via Module. +""" + +from .._compiled._nn import _Module + +class Module(_Module): + def __init__(self): + object.__setattr__(self, "_modules", {}) # not necessary, but more explicit + self._modules = {} + + """ + Stores attributes defined in __init__ in private + _modules dictionary + """ + def __setattr__(self, name, value): + if isinstance(value, Module): + self._modules[name] = value + object.__setattr__(self, name, value) + + """ + Returns a list of leaf parameters. Used to identify trainable + nodes of a graph. + """ + def parameters(self): + params = self._own_parameters() # calls C++ side for leaf modules + for module in self._modules.values(): + params.extend(module.parameters()) + return params + +""" +For convenience. +""" +class Sequential(Module): + def __init__(self): + super().__init__() + object.__setattr__(self, "_layers", []) + + def append(self, module): + self._layers.append(module) + + def forward(self, x): + for layer in self._layers: + x = layer(x) + return x + + def parameters(self): + params = [] + for layer in self._layers: + if hasattr(layer, 'parameters'): + result = layer.parameters() + if isinstance(result, list): + params.extend(result) + else: + params.extend(list(result)) # force conversion from BP proxy + elif hasattr(layer, 'params'): + params.extend(list(layer.params)) + return params \ No newline at end of file diff --git a/python_lib/dl_lib/sys/__init__.py b/python_lib/dl_lib/sys/__init__.py new file mode 100644 index 0000000..b21f145 --- /dev/null +++ b/python_lib/dl_lib/sys/__init__.py @@ -0,0 +1 @@ +from dl_lib._compiled._sys import getDevice, setDevice, setSeed \ No newline at end of file diff --git a/python_lib/dl_lib/train/__init__.py b/python_lib/dl_lib/train/__init__.py new file mode 100644 index 0000000..9614d89 --- /dev/null +++ b/python_lib/dl_lib/train/__init__.py @@ -0,0 +1,4 @@ +from dl_lib._compiled._train import TrainLoop +#from dl_lib._compiled._core import Tensor # re-export if needed + +__all__ = ['TrainLoop'] \ No newline at end of file diff --git a/python_lib/dl_lib/train/loss/__init__.py b/python_lib/dl_lib/train/loss/__init__.py new file mode 100644 index 0000000..c9d5b4f --- /dev/null +++ b/python_lib/dl_lib/train/loss/__init__.py @@ -0,0 +1,4 @@ +from dl_lib._compiled._train import BCE, BceWithSigmoid, CrossEntropy, CrossEntropyWithSoftmax +#from dl_lib._compiled._core import Tensor # re-export if needed + +__all__ = ['BCE', 'BceWithSigmoid', 'CrossEntropy', 'CrossEntropyWithSoftmax'] \ No newline at end of file diff --git a/python_lib/dl_lib/train/optim/__init__.py b/python_lib/dl_lib/train/optim/__init__.py new file mode 100644 index 0000000..a6669c6 --- /dev/null +++ b/python_lib/dl_lib/train/optim/__init__.py @@ -0,0 +1,4 @@ +from dl_lib._compiled._train import SGD, RmsProp +#from dl_lib._compiled._core import Tensor # re-export if needed + +__all__ = ['SGD', 'RmsProp'] \ No newline at end of file diff --git a/readme.md b/readme.md index 8c531f0..2eeaf95 100644 --- a/readme.md +++ b/readme.md @@ -34,10 +34,10 @@ For some examples on Python interface, see tests/python. 🚧 **Work in Progress** - Implementing additional layers and optimizations Roadmap: -- [ ] Python Binding Unit Tests -- [ ] Additional layer types (Conv2D, LSTM, etc.) +- [x] Python Binding Unit Tests +- [ ] Additional layer types (Conv2D, Dropout, etc.) - [ ] Optimizers and training framework -- [ ] CUDA kernels for performance-critical operations +- [ ] CUDA mode for operations - [ ] AlexNet reference implementation - [ ] Docker deployment example @@ -62,11 +62,12 @@ ctest ## Required -- Compiler capable of C++20 at least (we test with gcc 12.3.0) +- Compiler capable of C++23 at least (we test with gcc 13.3.0) - Boost Python -- Cmake > 3.24 +- Cmake > 3.28 - Python 3 (we test with 3.10, but it should work with any version) -- pytest for unit tests (we use 9.0.2) +- pytest and GTest for unit tests (we use pytest=9.0.2) +- Google Benchmark for benchmarking ## Troubleshooting diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0d564e2..d47e4d1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,17 +2,38 @@ add_subdirectory(backend) add_subdirectory(python) -target_link_libraries(_core +target_link_libraries(_core PRIVATE ${Boost_LIBRARIES} - ${PYTHON_LIBRARIES} + ${Python_LIBRARIES} BackendCore) target_include_directories(_core PRIVATE + ${Python_INCLUDE_DIRS} + ${Boost_INCLUDE_DIRS}) + +target_link_libraries(_nn PRIVATE + ${Boost_LIBRARIES} + ${PYTHON_LIBRARIES} + BackendCore) + +target_include_directories(_nn PRIVATE + ${PYTHON_INCLUDE_DIRS} + ${Boost_INCLUDE_DIRS}) + +target_link_libraries(_sys PRIVATE + ${Boost_LIBRARIES} + ${PYTHON_LIBRARIES} + BackendCore) + +target_include_directories(_sys PRIVATE ${PYTHON_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS}) -#target_link_libraries(py_layers PRIVATE ${Boost_LIBRARIES} ${PYTHON_LIBRARIES} BackendCore) -#target_include_directories(py_layers PRIVATE ${PYTHON_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS}) +target_link_libraries(_train PRIVATE + ${Boost_LIBRARIES} + ${PYTHON_LIBRARIES} + BackendCore) -# for compiled boost lib -#target_link_libraries(hello PRIVATE Boost::filesystem) \ No newline at end of file +target_include_directories(_train PRIVATE + ${PYTHON_INCLUDE_DIRS} + ${Boost_INCLUDE_DIRS}) \ No newline at end of file diff --git a/src/backend/CMakeLists.txt b/src/backend/CMakeLists.txt index 579fae3..ed6bade 100644 --- a/src/backend/CMakeLists.txt +++ b/src/backend/CMakeLists.txt @@ -1,25 +1,18 @@ -#include_directories( -# "${CMAKE_CURRENT_SOURCE_DIR}/computational_graph" -# "${CMAKE_CURRENT_SOURCE_DIR}/data_modeling" -# "${CMAKE_CURRENT_SOURCE_DIR}/utility" -# "${CMAKE_CURRENT_SOURCE_DIR}/layers" -# "${CMAKE_CURRENT_SOURCE_DIR}/system" -# "${CMAKE_CURRENT_SOURCE_DIR}/training/loss_functions" -# "${CMAKE_CURRENT_SOURCE_DIR}/training/optimizers" -# ) - file(GLOB_RECURSE CORE_SOURCES computational_graph/*.cpp data_modeling/*.cpp - #layers/*.cpp - #networks/*.cpp - #training/*.cpp - utility/*.cpp + module/*.cpp system/*.cpp + training/*.cpp + utility/*.cpp ) -add_library(BackendCore STATIC ${CORE_SOURCES}) +add_library(BackendCore SHARED ${CORE_SOURCES}) target_include_directories(BackendCore PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} +) + +set_target_properties(BackendCore PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${PYTHON_MODULE_DIR}" # make sure Python-modules see backend ) \ No newline at end of file diff --git a/src/backend/computational_graph/activation_functions/leaky_relu_node.cpp b/src/backend/computational_graph/activation_functions/leaky_relu_node.cpp new file mode 100644 index 0000000..83de4ca --- /dev/null +++ b/src/backend/computational_graph/activation_functions/leaky_relu_node.cpp @@ -0,0 +1,31 @@ +/** + * @file leaky_relu_node.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "leaky_relu_node.h" + +#include + +using namespace std; +using namespace cgraph; + +vector> LeakyReLuNode::backward(const Tensor& upstreamGrad) { + assert(!upstreamGrad.getRequiresGrad()); + constexpr ftype zero = 0.0; + + auto res = make_shared(upstreamGrad.getDims(), upstreamGrad.getDevice(), false); + + const auto& parent = parents[0]; + for(tensorSize_t i=0; iset((*parent)[i] > zero ? upstreamGrad[i] : upstreamGrad[i] * eps, i); + } + + return {res}; +} \ No newline at end of file diff --git a/src/backend/computational_graph/activation_functions/leaky_relu_node.h b/src/backend/computational_graph/activation_functions/leaky_relu_node.h new file mode 100644 index 0000000..1d9304e --- /dev/null +++ b/src/backend/computational_graph/activation_functions/leaky_relu_node.h @@ -0,0 +1,29 @@ +/** + * @file leaky_relu_node.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "computational_graph/graph_node.h" + +#include + +namespace cgraph { + class LeakyReLuNode final : public GraphNode { + private: + const ftype eps; + + public: + explicit LeakyReLuNode(std::shared_ptr t, const ftype eps) + : GraphNode({std::move(t)}), eps{eps} {} + + std::vector> backward(const Tensor& upstreamGrad) override; + }; +} diff --git a/src/backend/computational_graph/relu_node.cpp b/src/backend/computational_graph/activation_functions/relu_node.cpp similarity index 66% rename from src/backend/computational_graph/relu_node.cpp rename to src/backend/computational_graph/activation_functions/relu_node.cpp index 2e0f647..3fcc958 100644 --- a/src/backend/computational_graph/relu_node.cpp +++ b/src/backend/computational_graph/activation_functions/relu_node.cpp @@ -14,17 +14,18 @@ #include using namespace std; -using namespace graph; +using namespace cgraph; vector> ReLuNode::backward(const Tensor& upstreamGrad) { assert(!upstreamGrad.getRequiresGrad()); - constexpr ftype zero = 0.0; - auto res = make_shared(upstreamGrad.getDims().toVector(), upstreamGrad.getDevice(), false); + auto res = make_shared(upstreamGrad.getDims(), upstreamGrad.getDevice(), false); + + const auto& parent = parents[0]; for(tensorSize_t i=0; isetItem(v > zero ? v : zero, i); + res->set((*parent)[i] > zero ? upstreamGrad[i] : zero, i); } - return {std::move(res)}; + + return {res}; } \ No newline at end of file diff --git a/src/backend/computational_graph/relu_node.h b/src/backend/computational_graph/activation_functions/relu_node.h similarity index 61% rename from src/backend/computational_graph/relu_node.h rename to src/backend/computational_graph/activation_functions/relu_node.h index b0ce5b8..ef17749 100644 --- a/src/backend/computational_graph/relu_node.h +++ b/src/backend/computational_graph/activation_functions/relu_node.h @@ -11,24 +11,16 @@ #pragma once -#include "graph_node.h" +#include "computational_graph/graph_node.h" #include -namespace graph { +namespace cgraph { class ReLuNode final : public GraphNode { public: explicit ReLuNode(std::shared_ptr t) : GraphNode({std::move(t)}) {} - ReLuNode(const ReLuNode& other) = delete; - ReLuNode& operator=(const ReLuNode& other) = delete; - - ReLuNode(ReLuNode&& other) = default; - ReLuNode& operator=(ReLuNode&& other) = default; - - ~ReLuNode() noexcept = default; - std::vector> backward(const Tensor& upstreamGrad) override; }; } diff --git a/src/backend/computational_graph/activation_functions/sigmoid_node.cpp b/src/backend/computational_graph/activation_functions/sigmoid_node.cpp new file mode 100644 index 0000000..5873724 --- /dev/null +++ b/src/backend/computational_graph/activation_functions/sigmoid_node.cpp @@ -0,0 +1,35 @@ +/** + * @file sigmoid_node.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-14 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "sigmoid_node.h" + +#include + +using namespace std; +using namespace cgraph; + +vector> SigmoidNode::backward(const Tensor& upstreamGrad) { + assert(!upstreamGrad.getRequiresGrad()); + constexpr ftype zero = 0.0; + + auto res = make_shared(upstreamGrad.getDims(), upstreamGrad.getDevice(), false); + + // s is result from forward pass sigmoid + auto derivative = [](ftype s){ + return s * (1-s); + }; + + for(tensorSize_t i=0; iset(derivative((*sigmoid)[i]) * upstreamGrad[i], i); + } + + return {res}; +} \ No newline at end of file diff --git a/src/backend/computational_graph/activation_functions/sigmoid_node.h b/src/backend/computational_graph/activation_functions/sigmoid_node.h new file mode 100644 index 0000000..82e824d --- /dev/null +++ b/src/backend/computational_graph/activation_functions/sigmoid_node.h @@ -0,0 +1,32 @@ +/** + * @file relu_node.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-02-15 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "computational_graph/graph_node.h" +#include "data_modeling/tensor.h" + +#include +#include + +namespace cgraph { + class SigmoidNode final : public GraphNode { + private: + // cache the result of the forward function + std::shared_ptr sigmoid; + + public: + explicit SigmoidNode(std::shared_ptr t, std::shared_ptr sigmoid) + : GraphNode({std::move(t)}), sigmoid{std::move(sigmoid)} {} + + std::vector> backward(const Tensor& upstreamGrad) override; + }; +} diff --git a/src/backend/computational_graph/activation_functions/softmax_node.cpp b/src/backend/computational_graph/activation_functions/softmax_node.cpp new file mode 100644 index 0000000..8603355 --- /dev/null +++ b/src/backend/computational_graph/activation_functions/softmax_node.cpp @@ -0,0 +1,45 @@ +/** + * @file softmax_node.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-15 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "softmax_node.h" + +#include "data_modeling/tensor_functions.h" + +#include + +using namespace std; +using namespace cgraph; + +vector< shared_ptr > SoftmaxNode::backward(const Tensor& upstreamGrad) { + assert(!upstreamGrad.getRequiresGrad()); + + const auto& yPred = parents[0]; + auto res = make_shared(yPred->createEmptyCopy()); + + const auto bSize = yPred->getDims()[0]; + assert(bSize>0); + + for(tensorDim_t b=0; bgetDims()[1]; i++){ + ftype grad = 0; + const ftype yi = softmax->get(b, i); + + for(tensorDim_t j=0; jgetDims()[1]; j++){ + ftype yj = softmax->get(b, j); + ftype jacobian = (i==j) ? yi*(1-yj) : -yi*yj; + grad += upstreamGrad.get(b, j) * jacobian; + } + res->set(grad, b, i); + } + } + + return {res}; +} \ No newline at end of file diff --git a/src/backend/computational_graph/activation_functions/softmax_node.h b/src/backend/computational_graph/activation_functions/softmax_node.h new file mode 100644 index 0000000..6c3c8d1 --- /dev/null +++ b/src/backend/computational_graph/activation_functions/softmax_node.h @@ -0,0 +1,31 @@ +/** + * @file softmax_node.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-15 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "computational_graph/graph_node.h" +#include "utility/global_params.h" + +namespace cgraph { + class SoftmaxNode final : public GraphNode { + private: + const std::shared_ptr softmax; + + public: + explicit SoftmaxNode(std::shared_ptr t, std::shared_ptr softmax) + : GraphNode({std::move(t)}), softmax{std::move(softmax)} + { + assert(softmax->getSize()==parents[0]->getDims()[0]); + } + + std::vector> backward(const Tensor& upstreamGrad) override; + }; +} \ No newline at end of file diff --git a/src/backend/computational_graph/add_node.h b/src/backend/computational_graph/add_node.h deleted file mode 100644 index 99bc964..0000000 --- a/src/backend/computational_graph/add_node.h +++ /dev/null @@ -1,32 +0,0 @@ -/** - * @file add_node.h - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2026-02-03 - * - * @copyright Copyright (c) 2026 - * - */ - -#pragma once - -#include "graph_node.h" - -namespace graph { - class AddNode final : public GraphNode { - public: - explicit AddNode(std::shared_ptr t1, std::shared_ptr t2) - : GraphNode({std::move(t1), std::move(t2)}) {} - - AddNode(const AddNode& other) = delete; - AddNode& operator=(const AddNode& other) = delete; - - AddNode(AddNode&& other) = default; - AddNode& operator=(AddNode&& other) = default; - - ~AddNode() noexcept = default; - - std::vector> backward(const Tensor& upstreamGrad) override; - }; -} \ No newline at end of file diff --git a/src/backend/computational_graph/graph_node.h b/src/backend/computational_graph/graph_node.h index 67cb033..c8d3cec 100644 --- a/src/backend/computational_graph/graph_node.h +++ b/src/backend/computational_graph/graph_node.h @@ -18,17 +18,48 @@ #include -namespace graph { +// if GCC or Clang +#ifdef __GNUC__ +#include +#endif // __GNUC__ + +namespace cgraph { class GraphNode { protected: std::vector< std::shared_ptr > parents; explicit GraphNode(std::vector< std::shared_ptr > parents) : parents{std::move(parents)}{} public: + GraphNode(const GraphNode& other) = delete; + GraphNode& operator=(const GraphNode& other) = delete; + + GraphNode(GraphNode&& other) = default; + GraphNode& operator=(GraphNode&& other) = default; + + virtual ~GraphNode() noexcept = default; + virtual std::vector> backward(const Tensor& upstreamGrad) = 0; const auto& getParents() const noexcept { return parents; } + + virtual void print(std::ostream& os) const noexcept { + os << "\n"; + #ifdef __GNUC__ + // demangle name on gcc and clang + int status; + char* demangled = abi::__cxa_demangle(typeid(*this).name(), nullptr, nullptr, &status); + os << (status == 0 ? demangled : typeid(*this).name()); + std::free(demangled); + #else + os << typeid(*this).name(); + #endif + }; + + friend std::ostream& operator<<(std::ostream& os, const GraphNode& n) noexcept { + n.print(os); // calling vtable + return os; + } }; } diff --git a/src/backend/computational_graph/loss_functions/bce_node.cpp b/src/backend/computational_graph/loss_functions/bce_node.cpp new file mode 100644 index 0000000..add016f --- /dev/null +++ b/src/backend/computational_graph/loss_functions/bce_node.cpp @@ -0,0 +1,35 @@ +/** + * @file bce_node.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-14 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "bce_node.h" + +#include "data_modeling/tensor_functions.h" + +using namespace std; +using namespace cgraph; + +vector< shared_ptr > BceNode::backward(const Tensor& upstreamGrad) { + assert(!upstreamGrad.getRequiresGrad()); + + const auto& yPred = parents[0]; + auto res = make_shared(yPred->createEmptyCopy()); + + ftype bSize = yPred->getDims()[0]; + for(tensorSize_t i=0; igetDims()[0]; i++){ + auto yi = (*yTrue)[i]; + auto yiHat = (*yPred)[i]; + + auto g = -yi/std::max(yiHat, epsBce) + (1-yi)/std::max(1-yiHat, epsBce); + res->set(g/bSize, i); + } + + return {res}; +} \ No newline at end of file diff --git a/src/backend/computational_graph/loss_functions/bce_node.h b/src/backend/computational_graph/loss_functions/bce_node.h new file mode 100644 index 0000000..25b5f62 --- /dev/null +++ b/src/backend/computational_graph/loss_functions/bce_node.h @@ -0,0 +1,34 @@ +/** + * @file bce_node.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-14 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "computational_graph/graph_node.h" +#include "utility/global_params.h" + +namespace cgraph { + class BceNode final : public GraphNode { + private: + const std::shared_ptr yTrue; + + public: + explicit BceNode(std::shared_ptr y, std::shared_ptr yPred) + : GraphNode({std::move(yPred)}), yTrue{std::move(y)} + { + assert(parents[0]->getDims()==yTrue->getDims()); + if(!parents[0]->getRequiresGrad()){ + std::__throw_invalid_argument("yPred must be a graph node"); + } + } + + std::vector> backward(const Tensor& upstreamGrad) override; + }; +} \ No newline at end of file diff --git a/src/backend/computational_graph/loss_functions/bce_sigmoid_node.cpp b/src/backend/computational_graph/loss_functions/bce_sigmoid_node.cpp new file mode 100644 index 0000000..998e110 --- /dev/null +++ b/src/backend/computational_graph/loss_functions/bce_sigmoid_node.cpp @@ -0,0 +1,46 @@ +/** + * @file bce_sigmoid_node.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-17 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "bce_sigmoid_node.h" + +#include "data_modeling/tensor_functions.h" + +#include + +using namespace std; +using namespace cgraph; + +vector< shared_ptr > BceSigmoidNode::backward(const Tensor& upstreamGrad) { + assert(!upstreamGrad.getRequiresGrad()); + + auto sigmoid = [](ftype x){ + constexpr ftype one = 1.0; + if(x>=0){ + return one / (one + exp(-x)); + } + auto e = exp(x); + return e / (one + e); + }; + + const auto& logits = parents[0]; + auto res = make_shared(logits->createEmptyCopy()); + + ftype bSize = logits->getDims()[0]; + for(tensorSize_t i=0; igetDims()[0]; i++){ + auto y = (*yTrue)[i]; + auto s = sigmoid((*logits)[i]); + + auto g = s - y; + res->set(g/bSize, i); + } + + return {res}; +} \ No newline at end of file diff --git a/src/backend/computational_graph/loss_functions/bce_sigmoid_node.h b/src/backend/computational_graph/loss_functions/bce_sigmoid_node.h new file mode 100644 index 0000000..f90059c --- /dev/null +++ b/src/backend/computational_graph/loss_functions/bce_sigmoid_node.h @@ -0,0 +1,33 @@ +/** + * @file bce_sigmoid_node.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-17 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "computational_graph/graph_node.h" + +namespace cgraph { + class BceSigmoidNode final : public GraphNode { + private: + const std::shared_ptr yTrue; + + public: + explicit BceSigmoidNode(std::shared_ptr y, std::shared_ptr logits) + : GraphNode({std::move(logits)}), yTrue{std::move(y)} + { + assert(parents[0]->getDims()==yTrue->getDims()); + if(!parents[0]->getRequiresGrad()){ + std::__throw_invalid_argument("yPred must be a graph node"); + } + } + + std::vector> backward(const Tensor& upstreamGrad) override; + }; +} \ No newline at end of file diff --git a/src/backend/computational_graph/loss_functions/crossentropy_node.cpp b/src/backend/computational_graph/loss_functions/crossentropy_node.cpp new file mode 100644 index 0000000..249de43 --- /dev/null +++ b/src/backend/computational_graph/loss_functions/crossentropy_node.cpp @@ -0,0 +1,37 @@ +/** + * @file add_node.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-02-03 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "crossentropy_node.h" + +#include "data_modeling/tensor_functions.h" + +using namespace std; +using namespace cgraph; + +vector< shared_ptr > CrossEntropyNode::backward(const Tensor& upstreamGrad) { + assert(!upstreamGrad.getRequiresGrad()); + + const auto& yPred = parents[0]; + auto res = make_shared(yPred->createEmptyCopy()); + + ftype bSize = yPred->getDims()[0]; + for(tensorDim_t i=0; igetDims()[0]; i++){ + for(tensorDim_t j=0; jgetDims()[1]; j++){ + auto yij = yTrue->get(i, j); + auto yijHat = yPred->get(i, j); + + auto g = -yij/std::max(yijHat, epsCrossentropy); + res->set(g/bSize, i, j); + } + } + + return {res}; +} \ No newline at end of file diff --git a/src/backend/computational_graph/loss_functions/crossentropy_node.h b/src/backend/computational_graph/loss_functions/crossentropy_node.h new file mode 100644 index 0000000..2644a8d --- /dev/null +++ b/src/backend/computational_graph/loss_functions/crossentropy_node.h @@ -0,0 +1,41 @@ +/** + * @file add_node.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-02-03 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "computational_graph/graph_node.h" +#include "utility/global_params.h" + +namespace cgraph { + class CrossEntropyNode final : public GraphNode { + private: + const std::shared_ptr yTrue; + + public: + + /** + * @brief Expexted shapes are same as for CrossEntropyLoss. + * + * @param y shape (batchsize) + * @param yPred shape (batchsize, nclasses) + */ + explicit CrossEntropyNode(std::shared_ptr y, std::shared_ptr yPred) + : GraphNode({std::move(yPred)}), yTrue{std::move(y)} + { + assert(parents[0]->getDims()==yTrue->getDims()); + if(!parents[0]->getRequiresGrad()){ + std::__throw_invalid_argument("yPred must be a graph node"); + } + } + + std::vector> backward(const Tensor& upstreamGrad) override; + }; +} \ No newline at end of file diff --git a/src/backend/computational_graph/loss_functions/crossentropy_softmax_node.cpp b/src/backend/computational_graph/loss_functions/crossentropy_softmax_node.cpp new file mode 100644 index 0000000..ca0d1c7 --- /dev/null +++ b/src/backend/computational_graph/loss_functions/crossentropy_softmax_node.cpp @@ -0,0 +1,37 @@ +/** + * @file crossentropy_softmax_node.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-17 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "crossentropy_softmax_node.h" + +#include "module/activation_functions/softmax.h" + +using namespace std; +using namespace cgraph; + +vector< shared_ptr > CrossEntropySoftmaxNode::backward(const Tensor& upstreamGrad) { + assert(!upstreamGrad.getRequiresGrad()); + + const auto& logits = parents[0]; + auto res = make_shared(logits->createEmptyCopy()); + + const auto softmax = module::Softmax(); + const auto s = softmax(*logits); + + ftype bSize = logits->getDims()[0]; + for(tensorSize_t b=0; bgetDims()[0]; b++){ + for(tensorSize_t i=0; igetDims()[1]; i++){ + auto g = s.get(b, i) - yTrue->get(b, i); + res->set(g / bSize, b, i); + } + } + + return {res}; +} \ No newline at end of file diff --git a/src/backend/computational_graph/loss_functions/crossentropy_softmax_node.h b/src/backend/computational_graph/loss_functions/crossentropy_softmax_node.h new file mode 100644 index 0000000..17f0d15 --- /dev/null +++ b/src/backend/computational_graph/loss_functions/crossentropy_softmax_node.h @@ -0,0 +1,33 @@ +/** + * @file crossentropy_softmax_node.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-17 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "computational_graph/graph_node.h" + +namespace cgraph { + class CrossEntropySoftmaxNode final : public GraphNode { + private: + const std::shared_ptr yTrue; + + public: + explicit CrossEntropySoftmaxNode(std::shared_ptr y, std::shared_ptr logits) + : GraphNode({std::move(logits)}), yTrue{std::move(y)} + { + assert(parents[0]->getDims()==yTrue->getDims()); + if(!parents[0]->getRequiresGrad()){ + std::__throw_invalid_argument("yPred must be a graph node"); + } + } + + std::vector> backward(const Tensor& upstreamGrad) override; + }; +} \ No newline at end of file diff --git a/src/backend/computational_graph/loss_functions/rmse_node.cpp b/src/backend/computational_graph/loss_functions/rmse_node.cpp new file mode 100644 index 0000000..e3eb11e --- /dev/null +++ b/src/backend/computational_graph/loss_functions/rmse_node.cpp @@ -0,0 +1,39 @@ +/** + * @file rmse_node.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-14 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "rmse_node.h" + +#include "data_modeling/tensor_functions.h" + +#include + +using namespace std; +using namespace cgraph; + +vector< shared_ptr > RmseNode::backward(const Tensor& upstreamGrad) { + assert(!upstreamGrad.getRequiresGrad()); + constexpr ftype eps = 1e-9; + + const auto& yPred = parents[0]; + auto res = make_shared(yPred->createEmptyCopy()); + + ftype bSize = yPred->getDims()[0]; + for(tensorSize_t i=0; igetDims()[0]; i++){ + auto yi = (*yTrue)[i]; + auto yiHat = (*yPred)[i]; + + auto denom = rmse * bSize + eps; + auto g = (yiHat-yi) / denom; + res->set(g, i); + } + + return {res}; +} \ No newline at end of file diff --git a/src/backend/computational_graph/loss_functions/rmse_node.h b/src/backend/computational_graph/loss_functions/rmse_node.h new file mode 100644 index 0000000..62e5cc5 --- /dev/null +++ b/src/backend/computational_graph/loss_functions/rmse_node.h @@ -0,0 +1,35 @@ +/** + * @file rmse_node.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-14 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "computational_graph/graph_node.h" +#include "utility/global_params.h" + +namespace cgraph { + class RmseNode final : public GraphNode { + private: + const std::shared_ptr yTrue; + ftype rmse; + + public: + explicit RmseNode(std::shared_ptr y, std::shared_ptr yPred, ftype rmse) + : GraphNode({std::move(yPred)}), yTrue{std::move(y)}, rmse{rmse} + { + assert(parents[0]->getDims()==yTrue->getDims()); + if(!parents[0]->getRequiresGrad()){ + std::__throw_invalid_argument("yPred must be a graph node"); + } + } + + std::vector> backward(const Tensor& upstreamGrad) override; + }; +} \ No newline at end of file diff --git a/src/backend/computational_graph/add_node.cpp b/src/backend/computational_graph/tensor_ops/add_node.cpp similarity index 51% rename from src/backend/computational_graph/add_node.cpp rename to src/backend/computational_graph/tensor_ops/add_node.cpp index 43427f5..77cfd0c 100644 --- a/src/backend/computational_graph/add_node.cpp +++ b/src/backend/computational_graph/tensor_ops/add_node.cpp @@ -11,11 +11,19 @@ #include "add_node.h" +#include "data_modeling/tensor_functions.h" + using namespace std; -using namespace graph; +using namespace cgraph; vector< shared_ptr > AddNode::backward(const Tensor& upstreamGrad) { assert(!upstreamGrad.getRequiresGrad()); - auto res = make_shared(upstreamGrad.createDeepCopy()); - return {res, res}; + auto weightGrad = make_shared(upstreamGrad.createDeepCopy()); + + if(broadcasted){ + auto biasGrad = make_shared(TensorFunctions::SumOverDims(*weightGrad)); + return {weightGrad, biasGrad}; + } + + return {weightGrad, weightGrad}; } \ No newline at end of file diff --git a/src/backend/computational_graph/tensor_ops/add_node.h b/src/backend/computational_graph/tensor_ops/add_node.h new file mode 100644 index 0000000..2402394 --- /dev/null +++ b/src/backend/computational_graph/tensor_ops/add_node.h @@ -0,0 +1,33 @@ +/** + * @file add_node.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-02-03 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "computational_graph/graph_node.h" + +namespace cgraph { + class AddNode final : public GraphNode { + private: + // if t2 has been a vector we broadcast t2 into t1, see Tensor::add() + bool broadcasted = false; + + public: + explicit AddNode(std::shared_ptr t1, std::shared_ptr t2) + : GraphNode({std::move(t1), std::move(t2)}) { + // t2 is either tensor of same size or 1D-vector as bias + assert(t1->getDims().nDims()>=t2->getDims().nDims()); + + broadcasted = parents[0]->getDims() != parents[1]->getDims(); + } + + std::vector> backward(const Tensor& upstreamGrad) override; + }; +} \ No newline at end of file diff --git a/src/backend/computational_graph/elementwise_mul_node.cpp b/src/backend/computational_graph/tensor_ops/elementwise_mul_node.cpp similarity index 95% rename from src/backend/computational_graph/elementwise_mul_node.cpp rename to src/backend/computational_graph/tensor_ops/elementwise_mul_node.cpp index fcdb130..22d39e7 100644 --- a/src/backend/computational_graph/elementwise_mul_node.cpp +++ b/src/backend/computational_graph/tensor_ops/elementwise_mul_node.cpp @@ -12,7 +12,7 @@ #include "elementwise_mul_node.h" using namespace std; -using namespace graph; +using namespace cgraph; vector> ElementwiseMulNode::backward(const Tensor& upstreamGrad) { assert(!upstreamGrad.getRequiresGrad()); diff --git a/src/backend/computational_graph/elementwise_mul_node.h b/src/backend/computational_graph/tensor_ops/elementwise_mul_node.h similarity index 57% rename from src/backend/computational_graph/elementwise_mul_node.h rename to src/backend/computational_graph/tensor_ops/elementwise_mul_node.h index 81203fd..ffa8038 100644 --- a/src/backend/computational_graph/elementwise_mul_node.h +++ b/src/backend/computational_graph/tensor_ops/elementwise_mul_node.h @@ -11,22 +11,14 @@ #pragma once -#include "graph_node.h" +#include "computational_graph/graph_node.h" -namespace graph { +namespace cgraph { class ElementwiseMulNode final : public GraphNode { public: explicit ElementwiseMulNode(std::shared_ptr t1, std::shared_ptr t2) : GraphNode({std::move(t1), std::move(t2)}) {} - ElementwiseMulNode(const ElementwiseMulNode& other) = delete; - ElementwiseMulNode& operator=(const ElementwiseMulNode& other) = delete; - - ElementwiseMulNode(ElementwiseMulNode&& other) = default; - ElementwiseMulNode& operator=(ElementwiseMulNode&& other) = default; - - ~ElementwiseMulNode() noexcept = default; - std::vector> backward(const Tensor& upstreamGrad) override; }; } diff --git a/src/backend/computational_graph/getter_node.cpp b/src/backend/computational_graph/tensor_ops/getter_node.cpp similarity index 80% rename from src/backend/computational_graph/getter_node.cpp rename to src/backend/computational_graph/tensor_ops/getter_node.cpp index e1a3ac0..ded3640 100644 --- a/src/backend/computational_graph/getter_node.cpp +++ b/src/backend/computational_graph/tensor_ops/getter_node.cpp @@ -12,7 +12,7 @@ #include "getter_node.h" using namespace std; -using namespace graph; +using namespace cgraph; vector< shared_ptr > GetterNode::backward(const Tensor& upstreamGrad) { // upstreamGrad is scalar by definition @@ -20,14 +20,14 @@ vector< shared_ptr > GetterNode::backward(const Tensor& upstreamGrad) { auto res = make_shared(parents[0]->getDims(), parents[0]->getDevice(), false); for(tensorSize_t i=0; igetSize(); i++){ - res->setItem(0, i); + res->set(0, i); } if(std::holds_alternative(idx)){ - res->setItem(upstreamGrad.getItem(0), std::get(idx)); + res->set(upstreamGrad.get(0), std::get(idx)); } else if(std::holds_alternative(idx)){ - res->setItem(upstreamGrad.getItem(0), std::get(idx)); + res->set(upstreamGrad.get(0), std::get(idx)); } else{ __throw_runtime_error("Idx variant in unexpected state"); diff --git a/src/backend/computational_graph/getter_node.h b/src/backend/computational_graph/tensor_ops/getter_node.h similarity index 75% rename from src/backend/computational_graph/getter_node.h rename to src/backend/computational_graph/tensor_ops/getter_node.h index e55b2d5..c0bdb45 100644 --- a/src/backend/computational_graph/getter_node.h +++ b/src/backend/computational_graph/tensor_ops/getter_node.h @@ -11,12 +11,12 @@ #pragma once -#include "graph_node.h" +#include "computational_graph/graph_node.h" #include #include -namespace graph{ +namespace cgraph{ /** * @brief When calling a get function, say as in * loss += myTensor[i], then we need to build a graph in between @@ -36,13 +36,5 @@ namespace graph{ explicit GetterNode(std::shared_ptr t, const multiDimIdx_t& idx) : GraphNode({std::move(t)}), idx{idx} {} - GetterNode(const GetterNode& other) = delete; - GetterNode& operator=(const GetterNode& other) = delete; - - GetterNode(GetterNode&& other) = default; - GetterNode& operator=(GetterNode&& other) = default; - - ~GetterNode() noexcept = default; - std::vector> backward(const Tensor& upstreamGrad) override; };} diff --git a/src/backend/computational_graph/graph_creation.cpp b/src/backend/computational_graph/tensor_ops/graph_creation.cpp similarity index 57% rename from src/backend/computational_graph/graph_creation.cpp rename to src/backend/computational_graph/tensor_ops/graph_creation.cpp index 1955493..1a28a08 100644 --- a/src/backend/computational_graph/graph_creation.cpp +++ b/src/backend/computational_graph/tensor_ops/graph_creation.cpp @@ -19,91 +19,92 @@ using namespace std; -shared_ptr graph::mul(const shared_ptr left, const shared_ptr right) { +shared_ptr cgraph::mul(const shared_ptr left, const shared_ptr right) { auto res = make_shared((*left) * (*right)); if(left->getRequiresGrad() || right->getRequiresGrad()){ - res->setCgNode(make_shared(left, right)); + res->setCgNode(make_shared(left, right)); assert(res->getRequiresGrad()); } return res; } -shared_ptr graph::add(const shared_ptr left, const shared_ptr right) { +shared_ptr cgraph::add(const shared_ptr left, const shared_ptr right) { auto res = make_shared(*left + *right); if(left->getRequiresGrad() || right->getRequiresGrad()){ - res->setCgNode(make_shared(left, right)); + res->setCgNode(make_shared(left, right)); assert(res->getRequiresGrad()); } return res; } -shared_ptr graph::matmul(const shared_ptr left, const shared_ptr right) { +shared_ptr cgraph::matmul(const shared_ptr left, const shared_ptr right) { auto res = make_shared(left->matmul(*right)); if(left->getRequiresGrad() || right->getRequiresGrad()){ - res->setCgNode(make_shared(left, right)); + res->setCgNode(make_shared(left, right)); assert(res->getRequiresGrad()); } return res; } -shared_ptr graph::mul(const shared_ptr t, ftype scalar) { +shared_ptr cgraph::mul(const shared_ptr t, ftype scalar) { auto res = make_shared((*t) * scalar); if(t->getRequiresGrad()){ - res->setCgNode(std::make_shared(t, scalar)); + res->setCgNode(std::make_shared(t, scalar)); assert(res->getRequiresGrad()); } return res; } -shared_ptr graph::mul(ftype scalar, const shared_ptr t) { - return graph::mul(t, scalar); +shared_ptr cgraph::mul(ftype scalar, const shared_ptr t) { + return cgraph::mul(t, scalar); } -shared_ptr graph::add(const shared_ptr t, ftype scalar) { +shared_ptr cgraph::add(const shared_ptr t, ftype scalar) { auto res = make_shared((*t) + scalar); if(t->getRequiresGrad()){ - res->setCgNode(std::make_shared(t)); + res->setCgNode(std::make_shared(t)); assert(res->getRequiresGrad()); } return res; } -shared_ptr graph::add(ftype scalar, const shared_ptr t) { - return graph::add(t, scalar); +shared_ptr cgraph::add(ftype scalar, const shared_ptr t) { + return cgraph::add(t, scalar); } -shared_ptr graph::sub(const shared_ptr t, ftype scalar) { +shared_ptr cgraph::sub(const shared_ptr t, ftype scalar) { auto res = make_shared((*t) - scalar); if(t->getRequiresGrad()){ - res->setCgNode(std::make_shared(t)); + res->setCgNode(std::make_shared(t)); assert(res->getRequiresGrad()); } return res; } -shared_ptr graph::div(const shared_ptr t, ftype scalar) { +shared_ptr cgraph::div(const shared_ptr t, ftype scalar) { auto res = make_shared((*t) / scalar); if(t->getRequiresGrad()){ - res->setCgNode(std::make_shared(t, 1 / scalar)); + constexpr ftype eps = 1e-9; + res->setCgNode(std::make_shared(t, 1/std::max(scalar, eps))); assert(res->getRequiresGrad()); } return res; } /** - * @brief Special linear indexing, see getItem() overloads in tensor. + * @brief Special linear indexing, see get() overloads in tensor. * Used to keep the computational graph intact. * E.g. if we have something like * * loss = loss + other.get(i), we need to make sure get(i) can map to computational graph. */ -shared_ptr graph::get(const shared_ptr& t, tensorSize_t idx) { - ftype val = t->getItem(idx); +shared_ptr cgraph::get(const shared_ptr& t, tensorSize_t idx) { + ftype val = t->get(idx); auto res = make_shared(std::vector{1}, std::vector{val}, t->getDevice()); if(t->getRequiresGrad()){ - res->setCgNode(std::make_shared(t, idx)); + res->setCgNode(std::make_shared(t, idx)); assert(res->getRequiresGrad()); } return res; @@ -115,12 +116,12 @@ shared_ptr graph::get(const shared_ptr& t, tensorSize_t idx) { * * loss = loss + other.get(i), we need to make sure get(i) can map to computational graph. */ -shared_ptr graph::get(const shared_ptr& t, const vector& idx) { - ftype val = t->getItem(std::move(idx)); +shared_ptr cgraph::get(const shared_ptr& t, const vector& idx) { + ftype val = t->get(std::move(idx)); auto res = make_shared(std::vector{1}, std::vector{val}, t->getDevice()); if(t->getRequiresGrad()){ - res->setCgNode(std::make_shared(t, idx)); + res->setCgNode(std::make_shared(t, idx)); assert(res->getRequiresGrad()); } return res; @@ -129,11 +130,11 @@ shared_ptr graph::get(const shared_ptr& t, const vector graph::sumTensor(const shared_ptr t) { +shared_ptr cgraph::sumTensor(const shared_ptr t) { auto res = make_shared(std::vector{1}, std::vector{0.0}, t->getDevice(), t->getRequiresGrad()); for(tensorSize_t i=0; igetSize(); i++){ - res = graph::add(res, graph::get(t, i)); + res = cgraph::add(res, cgraph::get(t, i)); } return res; } \ No newline at end of file diff --git a/src/backend/computational_graph/graph_creation.h b/src/backend/computational_graph/tensor_ops/graph_creation.h similarity index 98% rename from src/backend/computational_graph/graph_creation.h rename to src/backend/computational_graph/tensor_ops/graph_creation.h index f68cb4c..9decd8b 100644 --- a/src/backend/computational_graph/graph_creation.h +++ b/src/backend/computational_graph/tensor_ops/graph_creation.h @@ -15,7 +15,7 @@ #include -namespace graph { +namespace cgraph { // Artithmetic operations std::shared_ptr mul(const std::shared_ptr left, const std::shared_ptr right); std::shared_ptr mul(const std::shared_ptr left, ftype scalar); diff --git a/src/backend/computational_graph/matmul_node.cpp b/src/backend/computational_graph/tensor_ops/matmul_node.cpp similarity index 95% rename from src/backend/computational_graph/matmul_node.cpp rename to src/backend/computational_graph/tensor_ops/matmul_node.cpp index fc24fd4..2237026 100644 --- a/src/backend/computational_graph/matmul_node.cpp +++ b/src/backend/computational_graph/tensor_ops/matmul_node.cpp @@ -12,7 +12,7 @@ #include "matmul_node.h" using namespace std; -using namespace graph; +using namespace cgraph; vector> MatMulNode::backward(const Tensor& upstreamGrad) { assert(!upstreamGrad.getRequiresGrad()); diff --git a/src/backend/computational_graph/matmul_node.h b/src/backend/computational_graph/tensor_ops/matmul_node.h similarity index 62% rename from src/backend/computational_graph/matmul_node.h rename to src/backend/computational_graph/tensor_ops/matmul_node.h index 7fa94bf..6758602 100644 --- a/src/backend/computational_graph/matmul_node.h +++ b/src/backend/computational_graph/tensor_ops/matmul_node.h @@ -11,24 +11,16 @@ #pragma once -#include "graph_node.h" +#include "computational_graph/graph_node.h" #include -namespace graph { +namespace cgraph { class MatMulNode final : public GraphNode { public: explicit MatMulNode(std::shared_ptr t1, std::shared_ptr t2) : GraphNode({std::move(t1), std::move(t2)}) {} - MatMulNode(const MatMulNode& other) = delete; - MatMulNode& operator=(const MatMulNode& other) = delete; - - MatMulNode(MatMulNode&& other) = default; - MatMulNode& operator=(MatMulNode&& other) = default; - - ~MatMulNode() noexcept = default; - std::vector> backward(const Tensor& upstreamGrad) override; }; } diff --git a/src/backend/computational_graph/scalar_op_nodes.cpp b/src/backend/computational_graph/tensor_ops/scalar_op_nodes.cpp similarity index 69% rename from src/backend/computational_graph/scalar_op_nodes.cpp rename to src/backend/computational_graph/tensor_ops/scalar_op_nodes.cpp index 05a3643..e0f52f0 100644 --- a/src/backend/computational_graph/scalar_op_nodes.cpp +++ b/src/backend/computational_graph/tensor_ops/scalar_op_nodes.cpp @@ -14,19 +14,19 @@ #include using namespace std; -using namespace graph; +using namespace cgraph; -vector> graph::ScalarAddNode::backward(const Tensor& upstreamGrad) { +vector> cgraph::ScalarAddNode::backward(const Tensor& upstreamGrad) { assert(!upstreamGrad.getRequiresGrad()); return {make_shared(upstreamGrad.createDeepCopy())}; } -vector> graph::ScalarMulNode::backward(const Tensor& upstreamGrad) { +vector> cgraph::ScalarMulNode::backward(const Tensor& upstreamGrad) { assert(!upstreamGrad.getRequiresGrad()); auto res = make_shared(upstreamGrad.createDeepCopy()); for(tensorSize_t i=0; igetSize(); i++){ - res->setItem(res->getItem(i) * factor, i); + res->set(res->get(i) * factor, i); } return {std::move(res)}; } \ No newline at end of file diff --git a/src/backend/computational_graph/scalar_op_nodes.h b/src/backend/computational_graph/tensor_ops/scalar_op_nodes.h similarity index 76% rename from src/backend/computational_graph/scalar_op_nodes.h rename to src/backend/computational_graph/tensor_ops/scalar_op_nodes.h index 5a6588e..13cb067 100644 --- a/src/backend/computational_graph/scalar_op_nodes.h +++ b/src/backend/computational_graph/tensor_ops/scalar_op_nodes.h @@ -11,9 +11,9 @@ #pragma once -#include "graph_node.h" +#include "computational_graph/graph_node.h" -namespace graph { +namespace cgraph { class ScalarAddNode final : public GraphNode { public: explicit ScalarAddNode(std::shared_ptr t) @@ -38,14 +38,6 @@ namespace graph { explicit ScalarMulNode(std::shared_ptr t, ftype factor) : GraphNode({std::move(t)}), factor{factor} {} - ScalarMulNode(const ScalarMulNode& other) = delete; - ScalarMulNode& operator=(const ScalarMulNode& other) = delete; - - ScalarMulNode(ScalarMulNode&& other) = default; - ScalarMulNode& operator=(ScalarMulNode&& other) = default; - - ~ScalarMulNode() noexcept = default; - std::vector> backward(const Tensor& upstreamGrad) override; }; } \ No newline at end of file diff --git a/src/backend/computational_graph/topological_sort.cpp b/src/backend/computational_graph/topological_sort.cpp index d6250ca..6a4266d 100644 --- a/src/backend/computational_graph/topological_sort.cpp +++ b/src/backend/computational_graph/topological_sort.cpp @@ -20,7 +20,7 @@ #include using namespace std; -using namespace graph; +using namespace cgraph; #ifndef NDEBUG /** @@ -133,7 +133,7 @@ vector< Tensor* > TopologicalSort::reverseSort(Tensor* root) { auto pushParentsWithGraphNode = [&nodeQueue, &edgeCounts](Tensor* t){ const auto& parents = t->cgNode->getParents(); - for(const auto& parent: parents){ // TODO: check for requiresGrad to save runtime? + for(const auto& parent: parents){ if(!parent->cgNode) continue; @@ -146,12 +146,15 @@ vector< Tensor* > TopologicalSort::reverseSort(Tensor* root) { }; // pass 2: topological sort based on Kahn's algorithm - vector< Tensor* > res; // TODO: reserve capacity to save runtime? + vector< Tensor* > res; + res.reserve(nodeQueue.size()); + nodeQueue.push(root); while(!nodeQueue.empty()){ auto tensorPtr = nodeQueue.front(); nodeQueue.pop(); - + assert(tensorPtr->cgNode); + if(edgeCounts[tensorPtr]==0){ pushParentsWithGraphNode(tensorPtr); res.push_back(tensorPtr); diff --git a/src/backend/computational_graph/topological_sort.h b/src/backend/computational_graph/topological_sort.h index 96bba77..5c60ed5 100644 --- a/src/backend/computational_graph/topological_sort.h +++ b/src/backend/computational_graph/topological_sort.h @@ -16,7 +16,7 @@ class Tensor; // to break circular dependency -namespace graph { +namespace cgraph { /** * @brief Topological sort class. * diff --git a/src/backend/data_modeling/device.cpp b/src/backend/data_modeling/device.cpp new file mode 100644 index 0000000..a7726ae --- /dev/null +++ b/src/backend/data_modeling/device.cpp @@ -0,0 +1,26 @@ +/** + * @file device.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-08 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "device.h" + +#include + +const char* DeviceToString(Device d) { + switch(d){ + case Device::CPU: + return "CPU"; + case Device::CUDA: + return "CUDA"; + } + + std::__throw_invalid_argument("Unknown device encountered"); + return ""; // suppress +} \ No newline at end of file diff --git a/src/backend/data_modeling/device.h b/src/backend/data_modeling/device.h new file mode 100644 index 0000000..65f83a5 --- /dev/null +++ b/src/backend/data_modeling/device.h @@ -0,0 +1,19 @@ +/** + * @file device.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-08 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +enum class Device { + CPU, + CUDA +}; + +const char* DeviceToString(Device d); \ No newline at end of file diff --git a/src/backend/data_modeling/dim_type.cpp b/src/backend/data_modeling/dim_type.cpp index af77c24..f7d1804 100644 --- a/src/backend/data_modeling/dim_type.cpp +++ b/src/backend/data_modeling/dim_type.cpp @@ -13,6 +13,7 @@ #include "utility/safe_arithmetics.h" #include +#include using namespace std; @@ -38,10 +39,7 @@ tensorDim_t Dimension::multVector(const std::vector& dims) const no void Dimension::resize(const std::vector& dims) { this->dims = dims; size = multVector(dims); - - if(size==0){ - __throw_invalid_argument("Tensor-Dims must all be greater than 0."); - } + assert(size>0); } /** @@ -55,10 +53,7 @@ void Dimension::swap(const tensorDim_t dim1, const tensorDim_t dim2) { Dimension::Dimension(const vector& dims) : dims{dims} { size = multVector(dims); - - if(size==0){ - __throw_invalid_argument("Tensor-Dims must all be greater than 0."); - } + assert(size>0); } Dimension::Dimension(const Dimension& other) : dims{other.dims}, size{other.size} { } @@ -82,15 +77,42 @@ Dimension& Dimension::operator=(Dimension&& other) noexcept { return *this; } -ostream& operator<<(ostream& os, const Dimension& d) noexcept { - os << "("; - for(int i=0; i newDims; + newDims.reserve(dims.size() - 1); + newDims.insert(newDims.end(), dims.begin(), dims.begin() + idx); + newDims.insert(newDims.end(), dims.begin() + idx + 1, dims.end()); + + return Dimension(newDims); +} + +ostream& operator<<(ostream& os, const Dimension& d) noexcept { + if(d.size>0){ + os << "\n("; + for(int i=0; i #include -template -concept is_valid_dim = requires(T x) { - requires std::is_integral_v>; - requires std::convertible_to, tensorDim_t>; - x >= 0; -}; - class Dimension final { private: std::vector dims; @@ -47,15 +40,20 @@ class Dimension final { ~Dimension() noexcept = default; + Dimension collapseDimension(int idx) const; + void resize(const std::vector& dims); - + tensorSize_t getSize() const noexcept { - assert(size!=0); return size; } - tensorDim_t getItem(int idx) const { - assert(size!=0); + tensorDim_t get(int idx) const { + return (*this)[idx]; + } + + tensorDim_t operator[](int idx) const { + assert(size>0); if(idx<0){ idx = dims.size() + idx; // -1 is last idx, -2 second last and so forth } @@ -70,7 +68,6 @@ class Dimension final { void swap(const tensorDim_t dim1, const tensorDim_t dim2); size_t nDims() const noexcept { - assert(size!=0); return dims.size(); } @@ -80,7 +77,6 @@ class Dimension final { } bool operator==(const std::vector& other) const { - assert(size!=0); return this->dims == other; } @@ -92,5 +88,5 @@ class Dimension final { return !(*this == other); } - friend std::ostream& operator<<(std::ostream& os, const Dimension& d) noexcept; + friend std::ostream& operator<<(std::ostream& os, const Dimension& d) noexcept; }; \ No newline at end of file diff --git a/src/backend/data_modeling/tensor.cpp b/src/backend/data_modeling/tensor.cpp index 04a94aa..34ec548 100644 --- a/src/backend/data_modeling/tensor.cpp +++ b/src/backend/data_modeling/tensor.cpp @@ -60,18 +60,63 @@ Tensor::tensorValues_t::~tensorValues_t() noexcept { * do not create a deepcopy, but construct another pointer pointing to the same piece * of memory. */ -void Tensor::tensorValues_t::copyValues(Tensor::tensorValues_t& target, - const Tensor::tensorValues_t& origin) { - assert(origin.device==target.device && origin.size==target.size); +void Tensor::tensorValues_t::copyValues(Tensor::tensorValues_t& target) const { + assert(device==target.device && size==target.size); - switch(origin.device){ + switch(device){ case Device::CPU: - for(tensorSize_t i=0; i= high - low); + + switch(device){ + case Device::CPU: + for(tensorSize_t i=0; i indices, + const tensorSize_t sizeOfDim) const { + assert(target.size >= sizeOfDim * indices.size()); + + switch(device){ + case Device::CPU: { + tensorSize_t targetOffset = 0; + for(tensorDim_t idx: indices){ + tensorSize_t thisOffset = idx * sizeOfDim; + copyValues(target, thisOffset, thisOffset+sizeOfDim, targetOffset); + targetOffset += sizeOfDim; + } + break; + } + case Device::CUDA: + __throw_runtime_error("CUDA not implemented for slicing"); + break; } } @@ -116,6 +161,7 @@ Tensor::tensorValues_t::operator+=(const Tensor::tensorValues_t& other) { break; case Device::CUDA: __throw_invalid_argument("CUDA not supported yet for += operation"); + break; } return *this; @@ -141,7 +187,7 @@ ftype Tensor::tensorValues_t::operator[](const tensorSize_t idx) const { return values[idx]; } -void Tensor::tensorValues_t::setItem(ftype v, tensorSize_t idx) { +void Tensor::tensorValues_t::set(ftype v, tensorSize_t idx) { if(idx >= size) throw std::out_of_range("Out of range for tensor"); @@ -156,7 +202,7 @@ void Tensor::tensorValues_t::setItem(ftype v, tensorSize_t idx) { __throw_runtime_error("Should never reach here."); } -ftype Tensor::tensorValues_t::getItem(tensorSize_t idx) { +ftype Tensor::tensorValues_t::get(tensorSize_t idx) { if(idx >= size) throw std::out_of_range("Out of range for tensor"); @@ -206,13 +252,13 @@ Tensor Tensor::createEmptyCopy() const { return res; } /** - * @brief Does a deep copy. + * @brief Does a deep copy, but omits gradient and computational graph information. */ Tensor Tensor::createDeepCopy() const { assert(!grads || (grads && !grads->requiresGrad)); // gradient should not require gradient auto res = Tensor(dims, values->getDevice(), requiresGrad); - tensorValues_t::copyValues(*res.values, *this->values); + values->copyValues(*res.values); /* if(grads){ res.grads = make_shared( grads->createDeepCopy() ); // TODO: do we want this? @@ -247,98 +293,64 @@ Tensor Tensor::multiplyScalar(const Tensor& scalar, const Tensor& right) noexcep * The check of whether they do or not is to be performed by the surrounding * network class object instance upon construction. */ -Tensor Tensor::matMulImpl(const Tensor& left, const Tensor& right) const { - if(left.dims.getItem(-1) != right.dims.getItem(-2)){ +Tensor Tensor::matMulImpl(const Tensor& left, const Tensor& right) { + if(left.dims.get(-1) != right.dims.get(-2)){ __throw_runtime_error("Tensor dimensions do not match"); } - if(abs(static_cast(right.dims.nDims()) - static_cast(left.dims.nDims())) > 1){ - auto str = "Tensor dimension assumptions violated. See file 'assumption_matrices.md'."; - __throw_invalid_argument(str); - } - + // broadcasting auto resDims = left.dims.nDims() > right.dims.nDims() ? left.dims.toVector() : right.dims.toVector(); - resDims[resDims.size()-2] = left.dims.getItem(-2); // rows - resDims[resDims.size()-1] = right.dims.getItem(-1); // cols + resDims[resDims.size()-2] = left.dims.get(-2); // rows + resDims[resDims.size()-1] = right.dims.get(-1); // cols - Tensor res(resDims, values->getDevice(), false); + Tensor res(resDims, left.values->getDevice(), false); // sizes of the 2D matrices respectively - const tensorSize_t leftSize = left.dims.getItem(-1) * left.dims.getItem(-2); - const tensorSize_t rightSize = right.dims.getItem(-1) * right.dims.getItem(-2); - const tensorSize_t resSize = left.dims.getItem(-2) * right.dims.getItem(-1); + const tensorSize_t leftSize = left.dims.get(-1) * left.dims.get(-2); + const tensorSize_t rightSize = right.dims.get(-1) * right.dims.get(-2); + const tensorSize_t resSize = left.dims.get(-2) * right.dims.get(-1); tensorSize_t leftOffset = 0; tensorSize_t rightOffset = 0; tensorSize_t resOffset = 0; - // lambda expected to get inlined by compiler - auto multiplyNTimes = [&](const tensorDim_t n){ - for(tensorDim_t i=0; igetSize() / resSize; // total size / size of 2D matrix - multiplyNTimes(nMultiplications); - } - else if(left.dims.nDims() > right.dims.nDims()) { - const auto nBatches = left.dims.getItem(0); - - for(tensorDim_t batch = 0; batch < nBatches; batch++){ - const auto nMultsPerBatch = res.values->getSize() / (nBatches * resSize); - multiplyNTimes(nMultsPerBatch); - rightOffset = 0; - } - } - else { - const auto nBatches = right.dims.getItem(0); + while(leftOffset < left.getSize()){ + matMul2DCpu(res, left, right, resOffset, leftOffset, rightOffset); - for(tensorDim_t batch = 0; batch < nBatches; batch++){ - const auto nMultsPerBatch = res.values->getSize() / (nBatches * resSize); - multiplyNTimes(nMultsPerBatch); - leftOffset = 0; - } + leftOffset += leftSize; + rightOffset += rightSize; + resOffset += resSize; } return res; } /** - * @brief Name says it all. Inplace operation on res + * @brief Name says it all. Inplace operation on res. */ void Tensor::matMul2DCpu(Tensor& res, const Tensor& left, const Tensor& right, const tensorSize_t resOffset, const tensorSize_t leftOffset, const tensorSize_t rightOffset) { + + const auto nRowsLeft = static_cast(left.dims.get(-2)); + const auto nColsLeft = static_cast(left.dims.get(-1)); + const auto nRowsRight = static_cast(right.dims.get(-2)); + const auto nColsRight = static_cast(right.dims.get(-1)); - const auto nRowsLeft = static_cast(left.dims.getItem(-2)); - const auto nColsLeft = static_cast(left.dims.getItem(-1)); - const auto nRowsRight = static_cast(right.dims.getItem(-2)); - const auto nColsRight = static_cast(right.dims.getItem(-1)); + tensorSize_t resIdx = resOffset; + for(tensorSize_t lrow=0; lrowgetDevice()==other.values->getDevice()); if(values->getDevice()==Device::CUDA){ __throw_invalid_argument("Multiplication not implemented on CUDA"); } @@ -356,36 +369,45 @@ Tensor Tensor::matmul(const Tensor& other) const { __throw_runtime_error("Tensors on different devices."); } - // TODO: check what to do about these two gradients and if you want broadcasting here at all - if(other.dims.getSize()==1){ - return multiplyScalar(other, *this); - } - else if(dims.getSize()==1){ - return multiplyScalar(*this, other); - } - return matMulImpl(*this, other); } /** - * @brief Elementise addition. + * @brief Addition of two tensors. This works in two ways: + * 1. Shapes of the two tensors are identical. In this case it is simple + * elementwise addition. + * 2. The second tensor is a vector. In this case broadcast it. We assume + * other.dims == (dimN) && this->dims == (dim0, dim1,..., dimN). */ Tensor Tensor::operator+(const Tensor& other) const { if(values->getDevice()==Device::CUDA){ - __throw_invalid_argument("Multiplication not implemented on CUDA"); + __throw_invalid_argument("Addition not implemented on CUDA"); } - if(this->dims != other.dims){ - __throw_invalid_argument("Tensors need same dimensions"); + if(this->dims != other.dims && + !(other.dims.nDims() == 1 && other.dims.get(0) == dims.get(-1))){ + __throw_invalid_argument("Tensors need matching dimensions"); } else if(values->getDevice()!=other.values->getDevice()){ __throw_runtime_error("Tensors on different devices."); } - assert(values->getSize()==other.values->getSize()); - Tensor res(dims, values->getDevice(), false); - for(tensorSize_t i=0; igetSize(); i++){ - (*res.values)[i] = (*values)[i] + (*other.values)[i]; + Tensor res(dims, values->getDevice()); + + if(dims==other.dims){ + // elementwise add + for(tensorSize_t i=0; igetSize(); i++){ + (*res.values)[i] = (*values)[i] + (*other.values)[i]; + } + } + else { [[likely]] + // broadcasted add + const auto stride = static_cast(other.dims.get(0)); + for(tensorSize_t offset=0; offsetgetSize(); offset+=stride){ + for(tensorSize_t i=0; igetDevice()==other.values->getDevice()); if(values->getDevice()==Device::CUDA){ __throw_invalid_argument("Multiplication not implemented on CUDA"); } // TODO: check what to do about these two gradients and if you want broadcasting here at all - if(other.dims.getSize()==1){ +/* if(other.dims.getSize()==1){ return multiplyScalar(other, *this); } else if(dims.getSize()==1){ return multiplyScalar(*this, other); - } + } */ if(this->dims != other.dims){ __throw_invalid_argument("Tensors need same dimensions"); @@ -501,26 +524,25 @@ void Tensor::backward() { } } - vector sortedTensors = graph::TopologicalSort::reverseSort(this); + vector sortedTensors = cgraph::TopologicalSort::reverseSort(this); for(auto tPtr: sortedTensors){ auto& tensor = *tPtr; assert(tensor.grads && !tensor.grads->requiresGrad); // gradient should not require grad - if(tensor.cgNode){ - auto incomingGrads = tensor.cgNode->backward(*tensor.grads); - const auto& parents = tensor.cgNode->getParents(); + auto incomingGrads = tensor.cgNode->backward(*tensor.grads); - for(size_t i=0; irequiresGrad){ - continue; - } - else if(!parent->grads){ - parent->grads = incomingGrads[i]; - } - else{ - *parent->grads->values += *incomingGrads[i]->values; - } + const auto& parents = tensor.cgNode->getParents(); + + for(size_t i=0; irequiresGrad){ + continue; + } + else if(!parent->grads){ + parent->grads = incomingGrads[i]; + } + else{ + *parent->grads->values += *incomingGrads[i]->values; } } } @@ -529,7 +551,7 @@ void Tensor::backward() { /** * @brief Get gradients */ -shared_ptr Tensor::getGrads() const { +shared_ptr Tensor::getGrads() const { if(!grads){ __throw_runtime_error("Tensor has no gradients."); } @@ -574,7 +596,7 @@ void Tensor::transposeImpl(Tensor& target, const int dim1, const int dim2) const // strides for source tensorSize_t stride = 1; for(int d = numDims - 1; d >= 0; d--) { - dimSizes[d] = source.dims.getItem(d); + dimSizes[d] = source.dims.get(d); sourceStrides[d] = stride; stride *= dimSizes[d]; } @@ -647,8 +669,8 @@ void Tensor::transposeImpl2D(Tensor& target, const int dim1, const int dim2) con transposedValues->resize(source.values->getSize()); tensorSize_t resIdx = 0; - for(tensorSize_t smallDimCount=0; smallDimCount&& newOrder) noexcept { /** * @brief Populates the tensor with value. */ -void Tensor::reset(const ftype x) { +void Tensor::reset(const ftype x) noexcept { for(tensorSize_t i=0; igetSize(); i++){ (*values)[i] = x; } @@ -745,8 +767,7 @@ void Tensor::reset(const ftype x) { /** * @brief Populates the tensor with values drawn according to initializer. */ -void Tensor::reset(const utility::InitClass ic) { - const auto init = utility::InitializerFactory::getInitializer(ic); +void Tensor::reset(const shared_ptr init) noexcept { for(tensorSize_t i=0; igetSize(); i++){ (*values)[i] = init->drawNumber(); } @@ -776,6 +797,46 @@ Device Tensor::getDevice() const noexcept { return values->getDevice(); } +/** + * @brief Gets a slice of this tensor. + * + * Quick and dirty implementation for now: Copies and + * returns. + * + * @param low Lower idx, inclusive bound. + * @param high Upper idx, non-inclusive bound. + * @return Tensor The slices tensor. + */ +Tensor Tensor::getSlice(tensorSize_t low, tensorSize_t high) const { + if(high<=low){ + __throw_invalid_argument("Upper bound most be larger than lower bound."); + } + + auto resDims = dims.toVector(); + resDims[0] = high-low; + Tensor res(std::move(resDims), values->getDevice(), false); + values->copyValues(*res.values, low, high, 0); + return res; +} + +/** + * @brief Like overload, but gets the slicing according to the + * indices given by the argument. Used e.g. in batch-size. + * + * @param indices A list of indices + * @return Tensor The result. + */ +Tensor Tensor::getSlice(span indices) const { + assert(indices.size()>0); + + auto resDims = dims.toVector(); + resDims[0] = indices.size(); + + Tensor res(std::move(resDims), values->getDevice(), false); + values->copyValues(*res.values, indices, getDimOffset(0, resDims)); + return res; +} + /** * @brief Prints only sample of up to 2D tensors. */ @@ -784,9 +845,9 @@ void printValuesCpu(std::ostream& os, const Tensor& t) { constexpr auto MAX_IDX = static_cast(10); if(t.dims.nDims()==2){ - for(tensorDim_t i=0; i& idx, const } const auto lastIdx = idx.size()-1; - tensorSize_t offsetFactor = dims.getItem(lastIdx); + tensorSize_t offsetFactor = dims.get(lastIdx); tensorSize_t res = idx[lastIdx]; for(int i=lastIdx-1; i>=0; i--){ res += idx[i] * offsetFactor; - offsetFactor *= dims.getItem(i); + offsetFactor *= dims.get(i); } return res; @@ -864,7 +925,7 @@ tensorSize_t Tensor::getDimOffset(const tensorDim_t dim, const Dimension& dims) tensorSize_t res = 1; // minimum possible dimsize for(size_t idx = dims.nDims()-1; idx>dim; idx--){ - res *= dims.getItem(idx); + res *= dims.get(idx); } assert(res!=0); @@ -881,7 +942,7 @@ tensorSize_t Tensor::getDimOffset(const int dim, const Dimension& dims) { /** * @brief No explanation needed. */ -ftype Tensor::getItem(const std::vector& idx) const { +ftype Tensor::get(const std::vector& idx) const { return (*values)[computeLinearIdx(idx, dims)]; } @@ -889,26 +950,34 @@ ftype Tensor::getItem(const std::vector& idx) const { * @brief Special getter, indexes the contained underlying array linearly. * Can lead to unexpected results in multidimensional tensors. */ -ftype Tensor::getItem(tensorSize_t idx) const { +ftype Tensor::get(tensorSize_t idx) const { + return (*this)[idx]; +} + +/** + * @brief For convenience. + */ +ftype Tensor::operator[](tensorSize_t idx) const { return (*values)[idx]; } -ftype Tensor::getItem(tensorDim_t idx0, tensorDim_t idx1) const { - return getItem({idx0, idx1}); + +ftype Tensor::get(tensorDim_t idx0, tensorDim_t idx1) const { + return get({idx0, idx1}); } -ftype Tensor::getItem(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2) const { - return getItem({idx0, idx1, idx2}); +ftype Tensor::get(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2) const { + return get({idx0, idx1, idx2}); } -ftype Tensor::getItem(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tensorDim_t idx3) const { - return getItem({idx0, idx1, idx2, idx3}); +ftype Tensor::get(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tensorDim_t idx3) const { + return get({idx0, idx1, idx2, idx3}); } /** * @brief No explanation needed. */ -void Tensor::setItem(ftype item, const std::vector& idx) { +void Tensor::set(ftype item, const std::vector& idx) { (*values)[computeLinearIdx(idx, dims)] = item; } @@ -916,18 +985,18 @@ void Tensor::setItem(ftype item, const std::vector& idx) { * @brief Special setter, indexes the contained underlying array linearly. * Can lead to unexpected results in multidimensional tensors. */ -void Tensor::setItem(ftype item, tensorDim_t idx) { +void Tensor::set(ftype item, tensorDim_t idx) { (*values)[idx] = item; } -void Tensor::setItem(ftype item, tensorDim_t idx0, tensorDim_t idx1) { - setItem(item, {idx0, idx1}); +void Tensor::set(ftype item, tensorDim_t idx0, tensorDim_t idx1) { + set(item, {idx0, idx1}); } -void Tensor::setItem(ftype item, tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2) { - setItem(item, {idx0, idx1, idx2}); +void Tensor::set(ftype item, tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2) { + set(item, {idx0, idx1, idx2}); } -void Tensor::setItem(ftype item, tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tensorDim_t idx3) { - setItem(item, {idx0, idx1, idx2, idx3}); +void Tensor::set(ftype item, tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tensorDim_t idx3) { + set(item, {idx0, idx1, idx2, idx3}); } \ No newline at end of file diff --git a/src/backend/data_modeling/tensor.h b/src/backend/data_modeling/tensor.h index 9c1384b..d8a59fc 100644 --- a/src/backend/data_modeling/tensor.h +++ b/src/backend/data_modeling/tensor.h @@ -12,6 +12,7 @@ #pragma once #include "dim_type.h" +#include "device.h" #include "computational_graph/topological_sort.h" #include "computational_graph/graph_node.h" @@ -19,38 +20,22 @@ #include "utility/initializers.h" #include -#include +#include #include #include +#include #include // break circular dependency -namespace graph { +namespace cgraph { class GraphNode; class TopologicalSort; } -enum class Device { - CPU, - CUDA -}; - -constexpr const char* DeviceToString(Device d) { - switch(d){ - case Device::CPU: - return "CPU"; - case Device::CUDA: - return "CUDA"; - } - - std::__throw_invalid_argument("Unknown device encountered"); - return ""; // suppress -} - class Tensor final : public std::enable_shared_from_this { - friend class graph::TopologicalSort; + friend class cgraph::TopologicalSort; private: /** @@ -85,8 +70,8 @@ class Tensor final : public std::enable_shared_from_this { ftype& operator[](const tensorSize_t idx); ftype operator[](const tensorSize_t idx) const; - void setItem(ftype v, tensorSize_t idx); - ftype getItem(tensorSize_t idx); + void set(ftype v, tensorSize_t idx); + ftype get(tensorSize_t idx); tensorSize_t getSize() const noexcept; @@ -110,7 +95,9 @@ class Tensor final : public std::enable_shared_from_this { void setDevice(const Device d) noexcept; Device getDevice() const noexcept; - static void copyValues(tensorValues_t& target, const tensorValues_t& origin); + void copyValues(tensorValues_t& target) const; + void copyValues(tensorValues_t& target, tensorSize_t low, tensorSize_t high, tensorSize_t targetOffset) const; + void copyValues(tensorValues_t& target, std::span indices, const tensorSize_t sizeOfDim) const; static void setDefaultDevice(const Device d) noexcept; static Device getDefaultDevice() noexcept; @@ -121,13 +108,15 @@ class Tensor final : public std::enable_shared_from_this { bool requiresGrad = false; std::shared_ptr grads = nullptr; // gradients - std::shared_ptr cgNode = nullptr; + std::shared_ptr cgNode = nullptr; static Tensor multiplyScalar(const Tensor& scalar, const Tensor& other) noexcept; - static void matMul2DCpu(Tensor& res, const Tensor& left, const Tensor& right, const tensorSize_t resOffset, - const tensorSize_t leftOffset, const tensorSize_t rightOffset); - Tensor matMulImpl(const Tensor& left, const Tensor& right) const; + static Tensor matMulImpl(const Tensor& left, const Tensor& right); + static void matMul2DCpu(Tensor& res, const Tensor& left, const Tensor& right, + const tensorSize_t resOffset, const tensorSize_t leftOffset, + const tensorSize_t rightOffset); + void transposeImpl2D(Tensor& target, const int dim1, const int dim2) const noexcept; void transposeImpl(Tensor& target, const int dim1, const int dim2) const noexcept; @@ -165,7 +154,7 @@ class Tensor final : public std::enable_shared_from_this { explicit Tensor(const std::vector& dims, const std::vector& initValues, Device d, bool requiresGrad=false) : Tensor{dims, d, requiresGrad} { for(tensorSize_t i=0; isetItem(initValues[i], i); + values->set(initValues[i], i); } } @@ -187,8 +176,8 @@ class Tensor final : public std::enable_shared_from_this { Tensor(Tensor&& other) noexcept; Tensor& operator=(Tensor&& other) noexcept; - void reset(const ftype x); - void reset(const utility::InitClass ic); + void reset(const ftype x) noexcept; + void reset(const std::shared_ptr init) noexcept; const Dimension& getDims() const noexcept; tensorSize_t getSize() const noexcept; @@ -217,9 +206,12 @@ class Tensor final : public std::enable_shared_from_this { friend Tensor operator+(ftype scalar, const Tensor& tensor); void backward(); - + + std::shared_ptr getGrads() const; + void setGrads(std::shared_ptr grads) noexcept { + this->grads = std::move(grads); + } bool hasGrads() const noexcept { return grads!=nullptr; } - std::shared_ptr getGrads() const; void transposeThis() noexcept; void transposeThis(int dim1, int dim2) noexcept; @@ -232,35 +224,30 @@ class Tensor final : public std::enable_shared_from_this { friend std::ostream& operator<<(std::ostream& os, const Tensor& t) noexcept; // for convenience we provide some simple getters - ftype getItem(tensorSize_t idx) const; - ftype getItem(tensorDim_t idx0, tensorDim_t idx1) const; - ftype getItem(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2) const; - ftype getItem(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tensorDim_t idx3) const; + ftype get(tensorSize_t idx) const; + ftype get(tensorDim_t idx0, tensorDim_t idx1) const; + ftype get(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2) const; + ftype get(tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tensorDim_t idx3) const; - ftype getItem(const std::vector& idx) const; + // non-const version of operator[] does not exist because of CUDA + ftype operator[](tensorSize_t idx) const; + + ftype get(const std::vector& idx) const; // for convenience we provide some simple setters - void setItem(ftype item, tensorDim_t idx); - void setItem(ftype item, tensorDim_t idx0, tensorDim_t idx1); - void setItem(ftype item, tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2); - void setItem(ftype item, tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tensorDim_t idx3); - void setItem(ftype item, const std::vector& idx); + void set(ftype item, tensorDim_t idx); + void set(ftype item, tensorDim_t idx0, tensorDim_t idx1); + void set(ftype item, tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2); + void set(ftype item, tensorDim_t idx0, tensorDim_t idx1, tensorDim_t idx2, tensorDim_t idx3); + void set(ftype item, const std::vector& idx); void setDevice(const Device d) noexcept; Device getDevice() const noexcept; bool getRequiresGrad() const noexcept { return requiresGrad; } - void setRequiresGrad(const bool requiresGrad) noexcept { - this->requiresGrad=requiresGrad; - if(!requiresGrad && cgNode){ - cgNode = nullptr; - } - if(!requiresGrad && grads){ - grads = nullptr; - } - } + void setRequiresGrad(const bool requiresGrad) noexcept { this->requiresGrad=requiresGrad; } - void setCgNode(std::shared_ptr node) noexcept { + void setCgNode(std::shared_ptr node) noexcept { cgNode = std::move(node); requiresGrad = true; } @@ -276,6 +263,9 @@ class Tensor final : public std::enable_shared_from_this { } } + Tensor getSlice(tensorSize_t low, tensorSize_t high) const; + Tensor getSlice(std::span indices) const; + // these two should not be exposed to the python interface static void setDefaultDevice(const Device d) noexcept; static Device getDefaultDevice() noexcept; diff --git a/src/backend/data_modeling/tensor_functions.cpp b/src/backend/data_modeling/tensor_functions.cpp index 3ac032e..5f06d2d 100644 --- a/src/backend/data_modeling/tensor_functions.cpp +++ b/src/backend/data_modeling/tensor_functions.cpp @@ -33,27 +33,29 @@ Tensor TensorFunctions::Ones(vector dims, const bool requiresGrad) return Ones(std::move(dims), Tensor::getDefaultDevice(), requiresGrad); } -Tensor TensorFunctions::Gaussian(vector dims, Device d, const bool requiresGrad) { +Tensor TensorFunctions::Gaussian(vector dims, const Device d, + const ftype stddev, const bool requiresGrad) { auto res = Tensor(std::move(dims), d, requiresGrad); - res.reset(utility::InitClass::Gaussian); + res.reset(std::make_shared(stddev)); return res; } -Tensor TensorFunctions::Gaussian(vector dims, const bool requiresGrad) { - return Gaussian(std::move(dims), Tensor::getDefaultDevice(), requiresGrad); +Tensor TensorFunctions::Gaussian(vector dims, const ftype stddev, + const bool requiresGrad) { + return Gaussian(std::move(dims), Tensor::getDefaultDevice(), stddev, requiresGrad); } // Tensor manipulation -void TensorFunctions::ToZeros(Tensor& t) { +void TensorFunctions::ToZeros(Tensor& t) noexcept { t.reset(0); } -void TensorFunctions::ToOnes(Tensor& t) { +void TensorFunctions::ToOnes(Tensor& t) noexcept { t.reset(1); } -void TensorFunctions::ToGaussian(Tensor& t) { - t.reset(utility::InitClass::Gaussian); +void TensorFunctions::ToGaussian(Tensor& t, const ftype stddev) { + t.reset(std::make_shared(stddev)); } shared_ptr TensorFunctions::makeSharedTensor(const vector& dims, bool requiresGrad){ @@ -75,4 +77,38 @@ shared_ptr TensorFunctions::makeSharedTensor(const vector& Device d, bool requiresGrad){ return make_shared(dims, initValues, d, requiresGrad); +} + +/************************************************************************************ + ************************************ Arithmetics *********************************** + ***********************************************************************************/ + + /** + * @brief Sums over the dimensions. If input is (b-size, dim1, dim2), and + * input dim-parameter is 1, then output will be (b-size, dim2). If + * input dim-parameter is 0, then output will be (dim1, dim2). + * Input dim must be smaller then t.dims.nDims()-1 + */ +Tensor TensorFunctions::SumOverDims(const Tensor& t, tensorDim_t dim) { + if(dim>=t.getDims().nDims()-1){ + __throw_invalid_argument("Dim parameter must be smaller than number of dims, but was " + dim); + } + + auto resDims = t.getDims().collapseDimension(dim); + Tensor res = Zeros(resDims.toVector(), t.getDevice(), t.getRequiresGrad()); // inefficiency toVector + + tensorSize_t stride = 1; + for(tensorDim_t i=dim+1; i dims, Device d, const bool requiresGrad=false); Tensor Ones(std::vector dims, const bool requiresGrad=false); - Tensor Gaussian(std::vector dims, Device d, const bool requiresGrad=false); - Tensor Gaussian(std::vector dims, const bool requiresGrad=false); + Tensor Gaussian(std::vector dims, Device d, ftype stddev, const bool requiresGrad=false); + Tensor Gaussian(std::vector dims, ftype stddev=1, const bool requiresGrad=false); std::shared_ptr makeSharedTensor(const std::vector& dims, bool requiresGrad=false); @@ -48,7 +48,10 @@ namespace TensorFunctions { // class name acts as namespace for us Device d, bool requiresGrad=false); // Tensor manipulation - void ToZeros(Tensor& t); - void ToOnes(Tensor& t); - void ToGaussian(Tensor& t); + void ToZeros(Tensor& t) noexcept; + void ToOnes(Tensor& t) noexcept; + void ToGaussian(Tensor& t, ftype stddev); + + // Arithmetics + Tensor SumOverDims(const Tensor& t, tensorDim_t dim=0); // default 0 for batch-size } \ No newline at end of file diff --git a/src/backend/layers/activation_functions/activation_function_base.cpp b/src/backend/layers/activation_functions/activation_function_base.cpp deleted file mode 100644 index 8e482c3..0000000 --- a/src/backend/layers/activation_functions/activation_function_base.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/** - * @file activation_function_base.cpp - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2026-02-02 - * - * @copyright Copyright (c) 2026 - * - */ - -#include "activation_function_base.h" - -using namespace activation; - -Tensor ActivationFunctionBase::forward(Tensor& t) const noexcept { - return (*this)(t); -} \ No newline at end of file diff --git a/src/backend/layers/activation_functions/activation_function_base.h b/src/backend/layers/activation_functions/activation_function_base.h deleted file mode 100644 index b0370c6..0000000 --- a/src/backend/layers/activation_functions/activation_function_base.h +++ /dev/null @@ -1,26 +0,0 @@ -/** - * @file function_base.h - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2026-02-01 - * - * @copyright Copyright (c) 2026 - * - */ - -#pragma once - -#include "tensor.h" - -#include - -namespace activation { - class ActivationFunctionBase { - public: - virtual Tensor operator()(Tensor& t) const noexcept = 0; - Tensor forward(Tensor& t) const noexcept; - - virtual Tensor gradient(const Tensor& t) noexcept = 0; - }; -} diff --git a/src/backend/layers/activation_functions/leaky_relu.h b/src/backend/layers/activation_functions/leaky_relu.h deleted file mode 100644 index fdcacc4..0000000 --- a/src/backend/layers/activation_functions/leaky_relu.h +++ /dev/null @@ -1,23 +0,0 @@ -/** - * @file leaky_relu.h - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2026-02-01 - * - * @copyright Copyright (c) 2026 - * - */ - -#pragma once - -#include "activation_function_base.h" -#include "tensor.h" - -namespace activation { - class LeakyReLU final : public ActivationFunctionBase { - public: - Tensor operator()(Tensor& t) const noexcept override; - Tensor gradient(const Tensor& t) noexcept override; - }; -} diff --git a/src/backend/layers/activation_functions/relu.cpp b/src/backend/layers/activation_functions/relu.cpp deleted file mode 100644 index fd2b42d..0000000 --- a/src/backend/layers/activation_functions/relu.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/** - * @file relu.cpp - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2026-02-01 - * - * @copyright Copyright (c) 2026 - * - */ - -#include "relu.h" -#include "global_params.h" - -using namespace activation; - -Tensor ReLU::operator()(Tensor& t) const noexcept { - for(tensorSize_t i=0; i target){ - t[i] = 0; - } - } - return t; -} - -Tensor ReLU::gradient(const Tensor& t) noexcept { -/* for(tensorSize_t i=0; i target){ - t[i] = 0; - } - } - return t; */ -} \ No newline at end of file diff --git a/src/backend/layers/activation_functions/relu.h b/src/backend/layers/activation_functions/relu.h deleted file mode 100644 index 4c223f0..0000000 --- a/src/backend/layers/activation_functions/relu.h +++ /dev/null @@ -1,23 +0,0 @@ -/** - * @file relu.h - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2026-02-01 - * - * @copyright Copyright (c) 2026 - * - */ - -#pragma once - -#include "activation_function_base.h" -#include "tensor.h" - -namespace activation { - class ReLU final : public ActivationFunctionBase { - public: - Tensor operator()(Tensor& t) const noexcept override; - Tensor gradient(const Tensor& t) noexcept override; - }; -} diff --git a/src/backend/layers/activation_functions/sigmoid.h b/src/backend/layers/activation_functions/sigmoid.h deleted file mode 100644 index e69de29..0000000 diff --git a/src/backend/layers/ff_layer.cpp b/src/backend/layers/ff_layer.cpp deleted file mode 100644 index 11cc82b..0000000 --- a/src/backend/layers/ff_layer.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/** - * @file ff_layer.cpp - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2025-12-07 - * - * @copyright Copyright (c) 2025 - * - */ - -#include "ff_layer.h" - -#include -#include - -using namespace std; -using namespace layers; - -FfLayer::FfLayer(const tensorDim_t in_size, const tensorDim_t out_size) { - //weights.emplace(Device::CPU, in_size, out_size); - //weights->reset(utility::InitClass::Gaussian); -} - -Tensor FfLayer::forward(const Tensor& input) const { - return *weights * input; -} - -//ftype* FfLayer::backward(ftype* input) { - -//} \ No newline at end of file diff --git a/src/backend/layers/ff_layer.h b/src/backend/layers/ff_layer.h deleted file mode 100644 index 1c2ea06..0000000 --- a/src/backend/layers/ff_layer.h +++ /dev/null @@ -1,32 +0,0 @@ -/** - * @file ff_layer.h - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2025-12-07 - * - * @copyright Copyright (c) 2025 - * - */ - -#pragma once - -#include "layer_base.h" -#include "utility/initializers.h" - -#include - -namespace layers { - class FfLayer : public LayerBase { - protected: - // memoization - // TODO: necessary? - //mutable std::optional v1; - - public: - FfLayer(tensorDim_t in_size, tensorDim_t out_size); - - Tensor forward(const Tensor& input) const override; - //ftype* backward(ftype* input) override; - }; -} diff --git a/src/backend/layers/layer_base.cpp b/src/backend/layers/layer_base.cpp deleted file mode 100644 index 7abcb0b..0000000 --- a/src/backend/layers/layer_base.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/** - * @file layer_base.cpp - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2026-01-25 - * - * @copyright Copyright (c) 2026 - * - */ - -#include "layer_base.h" - -#include - -using namespace std; -using namespace layers; - -ftype LayerBase::getItem(vector&&idx) const { - assert(weights); - return weights.value().getItem(std::move(idx)); -} - -void LayerBase::setItem(ftype item, vector&& idx) { - assert(weights); - weights.value().setItem(item, std::move(idx)); -} - -void LayerBase::print(ostream& os) const noexcept { - assert(weights); - os << weights.value(); -} - -ostream& operator<<(ostream& os, const LayerBase& l) noexcept { - l.print(os); - return os; -} \ No newline at end of file diff --git a/src/backend/layers/layer_base.h b/src/backend/layers/layer_base.h deleted file mode 100644 index 2803540..0000000 --- a/src/backend/layers/layer_base.h +++ /dev/null @@ -1,49 +0,0 @@ -/** - * @file layer_base.h - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2025-12-07 - * - * @copyright Copyright (c) 2025 - * - */ - -#pragma once - -#include "data_modeling/tensor.h" - -#include "utility/global_params.h" - -#include -#include - -namespace layers { - /** - * The base class for all the layers that we have. Not instantiable. - */ - class LayerBase { - protected: - std::optional weights = std::nullopt; - std::optional bias = std::nullopt; - - public: - LayerBase() = default; - virtual ~LayerBase() noexcept = default; - - virtual Tensor forward(const Tensor& input) const = 0; - //virtual ftype* backward(ftype* input) = 0; - - // weights should always exist, never nullopt outside of c'tor - const Dimension& getDims() const noexcept { - return weights.value().getDims(); - } - - ftype getItem(std::vector&& idx) const; - void setItem(ftype item, std::vector&& idx); - - - virtual void print(std::ostream& os) const noexcept; - friend std::ostream& operator<<(std::ostream& os, const LayerBase& t) noexcept; - }; -} \ No newline at end of file diff --git a/src/backend/module/activation_functions/leaky_relu.cpp b/src/backend/module/activation_functions/leaky_relu.cpp new file mode 100644 index 0000000..687e2bf --- /dev/null +++ b/src/backend/module/activation_functions/leaky_relu.cpp @@ -0,0 +1,40 @@ +/** + * @file leaky_relu.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "leaky_relu.h" +#include "computational_graph/activation_functions/leaky_relu_node.h" + +using namespace std; +using namespace module; + +Tensor LeakyReLu::operator()(const Tensor& t) const { + auto res = t.createDeepCopy(); + + for(tensorSize_t i=0; i LeakyReLu::operator()(const shared_ptr& t) const { + auto res = make_shared((*this)(*t)); + + if(t->getRequiresGrad()){ + res->setCgNode(make_shared(t, eps)); + assert(res->getRequiresGrad()); + } + + return res; +} \ No newline at end of file diff --git a/src/backend/module/activation_functions/leaky_relu.h b/src/backend/module/activation_functions/leaky_relu.h new file mode 100644 index 0000000..94f0c3d --- /dev/null +++ b/src/backend/module/activation_functions/leaky_relu.h @@ -0,0 +1,32 @@ +/** + * @file leaky_relu.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-02-01 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "module/module_base.h" + +namespace module { + class LeakyReLu final : public ModuleBase { + private: + const ftype eps; + + public: + LeakyReLu(ftype eps=0.01) : eps{eps} + { } + + Tensor operator()(const Tensor& t) const override; + std::shared_ptr operator()(const std::shared_ptr& t) const override; + + void print(std::ostream& os) const noexcept override { + os << "\nLeakyReLU\neps: " << eps; + } + }; +} diff --git a/src/backend/module/activation_functions/relu.cpp b/src/backend/module/activation_functions/relu.cpp new file mode 100644 index 0000000..7935304 --- /dev/null +++ b/src/backend/module/activation_functions/relu.cpp @@ -0,0 +1,40 @@ +/** + * @file relu.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-02-01 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "relu.h" +#include "computational_graph/activation_functions/relu_node.h" + +using namespace std; +using namespace module; + +Tensor ReLu::operator()(const Tensor& t) const { + auto res = t.createDeepCopy(); + + for(tensorSize_t i=0; i ReLu::operator()(const shared_ptr& t) const { + auto res = make_shared((*this)(*t)); + + if(t->getRequiresGrad()){ + res->setCgNode(make_shared(t)); + assert(res->getRequiresGrad()); + } + + return res; +} diff --git a/src/backend/module/activation_functions/relu.h b/src/backend/module/activation_functions/relu.h new file mode 100644 index 0000000..05268e0 --- /dev/null +++ b/src/backend/module/activation_functions/relu.h @@ -0,0 +1,24 @@ +/** + * @file relu.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-02-01 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "module/module_base.h" + +namespace module { + class ReLu final : public ModuleBase { + public: + ReLu() = default; + + Tensor operator()(const Tensor& t) const override; + std::shared_ptr operator()(const std::shared_ptr& t) const override; + }; +} diff --git a/src/backend/module/activation_functions/sigmoid.cpp b/src/backend/module/activation_functions/sigmoid.cpp new file mode 100644 index 0000000..765b44a --- /dev/null +++ b/src/backend/module/activation_functions/sigmoid.cpp @@ -0,0 +1,52 @@ +/** + * @file sigmoid.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "sigmoid.h" + +#include "computational_graph/activation_functions/sigmoid_node.h" + +#include + +using namespace std; +using namespace module; + +/** + * @brief Sigmoid activation function. + */ +Tensor Sigmoid::operator()(const Tensor& t) const { + auto res = t.createEmptyCopy(); + + constexpr ftype one = 1.0; + auto compute = [](ftype x){ + if(x>=0){ + return one / (one + exp(-x)); + } + auto e = exp(x); + return e / (one + e); + }; + + for(tensorSize_t i=0; i Sigmoid::operator()(const shared_ptr& t) const { + auto res = make_shared((*this)(*t)); + + if(t->getRequiresGrad()){ + res->setCgNode(make_shared(t, res)); + assert(res->getRequiresGrad()); + } + + return res; +} diff --git a/src/backend/module/activation_functions/sigmoid.h b/src/backend/module/activation_functions/sigmoid.h new file mode 100644 index 0000000..1cdf8d7 --- /dev/null +++ b/src/backend/module/activation_functions/sigmoid.h @@ -0,0 +1,22 @@ +/** + * @file sigmoid.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "module/module_base.h" + +namespace module { + class Sigmoid final : public ModuleBase { + public: + Tensor operator()(const Tensor& t) const override; + std::shared_ptr operator()(const std::shared_ptr& t) const override; + }; +} diff --git a/src/backend/module/activation_functions/softmax.cpp b/src/backend/module/activation_functions/softmax.cpp new file mode 100644 index 0000000..a001a69 --- /dev/null +++ b/src/backend/module/activation_functions/softmax.cpp @@ -0,0 +1,81 @@ +/** + * @file softmax.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "softmax.h" + +#include "computational_graph/activation_functions/softmax_node.h" + +#include + +using namespace std; +using namespace module; + +/** + * @brief Softmax over last dimension. Expects shape + * (dim1, dim2, ..., n_classes) + * @return Tensor of shape (dim1, dim2, ..., n_classes) [== input.shape] + */ +Tensor Softmax::operator()(const Tensor& t) const { + if(t.getDims().nDims()<2){ + __throw_invalid_argument("Softmax expects input shape of minimum two dimensions"); + } + + const auto nRows = t.getDims()[-2]; + const auto nCols = t.getDims()[-1]; + + // pre-compute exponents + Tensor tmp(t.getDims(), t.getDevice(), false); + for(tensorDim_t i=0; i::infinity(); + for(tensorDim_t j=0; j Softmax::operator()(const shared_ptr& t) const { + auto res = make_shared((*this)(*t)); + + if(t->getRequiresGrad()){ + res->setCgNode(make_shared(t, res)); + assert(res->getRequiresGrad()); + } + + return res; +} diff --git a/src/backend/module/activation_functions/softmax.h b/src/backend/module/activation_functions/softmax.h new file mode 100644 index 0000000..d3c4ade --- /dev/null +++ b/src/backend/module/activation_functions/softmax.h @@ -0,0 +1,22 @@ +/** + * @file softmax.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "module/module_base.h" + +namespace module { + class Softmax final : public ModuleBase { + public: + Tensor operator()(const Tensor& t) const override; + std::shared_ptr operator()(const std::shared_ptr& t) const override; + }; +} diff --git a/src/backend/module/layers/ff_layer.cpp b/src/backend/module/layers/ff_layer.cpp new file mode 100644 index 0000000..82fed69 --- /dev/null +++ b/src/backend/module/layers/ff_layer.cpp @@ -0,0 +1,84 @@ +/** + * @file ff_layer.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2025-12-07 + * + * @copyright Copyright (c) 2025 + * + */ + +#include "ff_layer.h" +#include "data_modeling/tensor_functions.h" + +#include "computational_graph/tensor_ops/graph_creation.h" + +#include +#include + +using namespace std; +using namespace module; +using namespace utility; + +FfLayer::FfLayer(tensorDim_t inSize, tensorDim_t outSize, bool useBias, bool requiresGrad, shared_ptr init) + : FfLayer(inSize, outSize, Tensor::getDefaultDevice(), useBias, requiresGrad, init) {} + +/** + * @brief Construct a new Ff Layer:: Ff Layer object + * Assumption for dims: (in-size, out-size) + * @param dims Dimensions, see above. + * @param d The device. + * @param useBias Use a bias if true. Bias will receiver shape (n_rows) + * @param requiresGrad If true train this layer. + */ +FfLayer::FfLayer(tensorDim_t inSize, tensorDim_t outSize, Device d, + bool useBias, bool requiresGrad, shared_ptr init) + : useBias{useBias}, requiresGrad{requiresGrad} +{ + if(!init){ + init = make_shared(inSize, outSize); + } + + weights = make_shared(Dimension({inSize, outSize}), d, requiresGrad); + weights->reset(init); + + if(useBias){ + bias = make_shared(vector{outSize}, d, requiresGrad); + TensorFunctions::ToZeros(*bias); + } +} + +/** + * @brief Normal forward function. Does not build computational graph. + * + * Assumption for input: (b-size, ..., dim1, in-size) + */ +Tensor FfLayer::operator()(const Tensor& input) const { + auto res = input.matmul(*weights); + + if(useBias){ + res = res + *bias; + } + + return res; +} + +/** + * @brief Like overload, but creates computational graph. + */ +std::shared_ptr FfLayer::operator()(const std::shared_ptr& input) const { + auto res = cgraph::matmul(input, weights); + if(useBias){ + res = cgraph::add(res, bias); + } + + return res; +} + +void FfLayer::print(ostream& os) const noexcept { + os << "\nFfLayer\nWeigths:\n" << *weights; + if(bias){ + os << "\nBias:\n" << *bias; + } +} \ No newline at end of file diff --git a/src/backend/module/layers/ff_layer.h b/src/backend/module/layers/ff_layer.h new file mode 100644 index 0000000..8c58dc2 --- /dev/null +++ b/src/backend/module/layers/ff_layer.h @@ -0,0 +1,55 @@ +/** + * @file ff_layer.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2025-12-07 + * + * @copyright Copyright (c) 2025 + * + */ + +#pragma once + +#include "module/module_base.h" +#include "utility/initializers.h" + +#include + +namespace module { + class FfLayer : public ModuleBase { + bool requiresGrad = false; + bool useBias = false; + + std::shared_ptr weights = nullptr; + std::shared_ptr bias = nullptr; + + public: + FfLayer(tensorDim_t inSize, tensorDim_t outSize, + bool useBias=true, bool requiresGrad=false, std::shared_ptr init=nullptr); + + FfLayer(tensorDim_t inSize, tensorDim_t outSize, Device d, + bool useBias=true, bool requiresGrad=false, std::shared_ptr init=nullptr); + + Tensor operator()(const Tensor& input) const override; + std::shared_ptr operator()(const std::shared_ptr& input) const override; + + const Dimension& getDims() const { + assert(weights); + return weights->getDims(); + } + + auto getWeights() const noexcept { return weights; } + auto getBias() const noexcept { return bias; } + + bool hasWeights() const { + return weights != nullptr; + } + + std::vector< std::shared_ptr > parameters() const override { + return {weights, bias}; + } + + void print(std::ostream& os) const noexcept override; + }; +} diff --git a/src/backend/module/module_base.cpp b/src/backend/module/module_base.cpp new file mode 100644 index 0000000..951f96c --- /dev/null +++ b/src/backend/module/module_base.cpp @@ -0,0 +1,21 @@ +/** + * @file module.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-13 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "module/module_base.h" + +#include + +using namespace std; + +ostream& module::operator<<(ostream& os, const module::ModuleBase& l) noexcept { + l.print(os); // calling vtable + return os; +} \ No newline at end of file diff --git a/src/backend/module/module_base.h b/src/backend/module/module_base.h new file mode 100644 index 0000000..28247ab --- /dev/null +++ b/src/backend/module/module_base.h @@ -0,0 +1,66 @@ +/** + * @file module_base.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-13 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "data_modeling/tensor.h" +#include "utility/global_params.h" + +#include +#include +#include + +#include + +// if GCC or Clang +#ifdef __GNUC__ +#include +#endif // __GNUC__ + +namespace module { + /** + * The base class for all the layers that we have. Not instantiable. + */ + class ModuleBase { + public: + ModuleBase() = default; + + ModuleBase(const ModuleBase& other) = delete; + ModuleBase& operator=(const ModuleBase& other) = delete; + + ModuleBase(ModuleBase&& other) noexcept = default; + ModuleBase& operator=(ModuleBase&& other) noexcept = default; + + ~ModuleBase() noexcept = default; + + // for inference -> no graph creation + virtual Tensor operator()(const Tensor& input) const = 0; + // for training -> creates graph + virtual std::shared_ptr operator()(const std::shared_ptr& input) const = 0; + + virtual std::vector< std::shared_ptr > parameters() const { return {}; } + + virtual void print(std::ostream& os) const noexcept { + os << "\n"; + #ifdef __GNUC__ + // demangle name on gcc and clang + int status; + char* demangled = abi::__cxa_demangle(typeid(*this).name(), nullptr, nullptr, &status); + os << (status == 0 ? demangled : typeid(*this).name()); + std::free(demangled); + #else + os << typeid(*this).name(); + #endif + }; + + friend std::ostream& operator<<(std::ostream& os, const ModuleBase& t) noexcept; + }; +} \ No newline at end of file diff --git a/src/backend/module/networks/sequential.cpp b/src/backend/module/networks/sequential.cpp new file mode 100644 index 0000000..e807e51 --- /dev/null +++ b/src/backend/module/networks/sequential.cpp @@ -0,0 +1,67 @@ +/** + * @file sequential.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2025-12-07 + * + * @copyright Copyright (c) 2025 + * + */ + +#include "sequential.h" + +using namespace std; +using namespace module; + +Tensor Sequential::operator()(const Tensor& input) const { + if(layers.size()==0){ + __throw_invalid_argument("Network empy, cannot be called."); + } + + auto x = layers[0]->operator()(input); + for(int i=1; ioperator()(x); + } + + return x; +} + +shared_ptr Sequential::operator()(const shared_ptr& input) const { + if(layers.size()==0){ + __throw_invalid_argument("Network empy, cannot be called."); + } + + auto x = layers[0]->operator()(input); + for(int i=1; ioperator()(x); + } + + return x; +} + +vector> Sequential::parameters() const { + vector> res; + + for(const auto& layer: layers) { + auto p = layer->parameters(); + for(auto& pp: p){ + if(pp){ + res.push_back(std::move(pp)); + } + } + } + + return res; +} + +void Sequential::append(shared_ptr l) { + layers.push_back(move(l)); +} + +void Sequential::print(std::ostream& os) const noexcept { + os << "\nSequential"; + for(const auto& l: layers){ + os << *l; + } +} \ No newline at end of file diff --git a/src/backend/module/networks/sequential.h b/src/backend/module/networks/sequential.h new file mode 100644 index 0000000..cf13417 --- /dev/null +++ b/src/backend/module/networks/sequential.h @@ -0,0 +1,44 @@ +/** + * @file sequential.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2025-12-07 + * + * @copyright Copyright (c) 2025 + * + */ + +#pragma once + +#include "module/module_base.h" + +#include +#include + +namespace module { + class Sequential : public ModuleBase { + protected: + std::vector< std::shared_ptr > layers; + + public: + Sequential() = default; + + Sequential(const Sequential& other) = delete; + Sequential& operator=(const Sequential& other) = delete; + + Sequential(Sequential&& other) noexcept = default; + Sequential& operator=(Sequential&& other) noexcept = default; + + ~Sequential() noexcept = default; + + Tensor operator()(const Tensor& input) const override; + std::shared_ptr operator()(const std::shared_ptr& input) const override; + + std::vector> parameters() const override; + + void append(std::shared_ptr l); + + void print(std::ostream& os) const noexcept override; + }; +} diff --git a/src/backend/networks/sequential.cpp b/src/backend/networks/sequential.cpp deleted file mode 100644 index 08617e5..0000000 --- a/src/backend/networks/sequential.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/** - * @file sequential.cpp - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2025-12-07 - * - * @copyright Copyright (c) 2025 - * - */ - -#include "sequential.h" - -using namespace std; -using namespace layers; - -/** - * @brief Returns true if dimensions valid, else false. - * Ensures consistency along network. - */ -bool SequentialNetwork::assertDims(const LayerBase& layer) const noexcept { - if(layers.size() == 0) - return true; - - return layers.at(layers.size()-1).getDims() == layer.getDims(); -} - -Tensor SequentialNetwork::forward(const Tensor& input) const { - if(input.getDims().getItem(1) != layers.at(0).getDims().getItem(0)){ - // TODO: show meaningful message rather than exception - __throw_invalid_argument("Not implemented yet. Dimensions don't match"); - } - - if(layers.size()==0){ - // TODO: show meaningful message rather than exception - __throw_invalid_argument("Network empy, cannot be called."); - } - - Tensor x = layers.at(0).forward(input); - for(int i=1; i -#include -#include - -class SequentialNetwork { - protected: - std::vector layers; - bool assertDims(const layers::LayerBase& layer) const noexcept; - - template - requires (std::derived_from< std::remove_cvref_t, layers::LayerBase >) - void addLayer(T&& layer) { - if(!assertDims(layer)){ - // TODO: show warning that the dims don't match - return; - } - layers.push_back(std::forward(layer)); - } - - public: - SequentialNetwork() = default; - - Tensor forward(const Tensor& input) const; -}; - -/*template -void SequentialNetwork::addLayer(LayerBase&& layer) noexcept { - layers.push_back(std::forward(layer)); -}*/ \ No newline at end of file diff --git a/src/backend/system/sys_functions.cpp b/src/backend/system/sys_functions.cpp index 02a501f..d653663 100644 --- a/src/backend/system/sys_functions.cpp +++ b/src/backend/system/sys_functions.cpp @@ -10,13 +10,20 @@ */ #include "sys_functions.h" +#include "data_modeling/tensor.h" -using namespace global; +#include "utility/initializers.h" -void setDevice(Device d) noexcept { +using namespace sys; + +void sys::setDevice(Device d) noexcept { Tensor::setDefaultDevice(d); } -Device getDevice() noexcept { +Device sys::getDevice() noexcept { return Tensor::getDefaultDevice(); +} + +void sys::setRandomSeed(const unsigned int s) noexcept { + utility::InitializerBase::setSeed(s); } \ No newline at end of file diff --git a/src/backend/system/sys_functions.h b/src/backend/system/sys_functions.h index ad58358..b3a0f63 100644 --- a/src/backend/system/sys_functions.h +++ b/src/backend/system/sys_functions.h @@ -12,9 +12,11 @@ #pragma once -#include "data_modeling/tensor.h" +#include "data_modeling/device.h" -namespace global { +namespace sys { void setDevice(Device d) noexcept; Device getDevice() noexcept; + + void setRandomSeed(unsigned int s) noexcept; } \ No newline at end of file diff --git a/src/backend/training/loss_functions/bce_loss.cpp b/src/backend/training/loss_functions/bce_loss.cpp new file mode 100644 index 0000000..a6df909 --- /dev/null +++ b/src/backend/training/loss_functions/bce_loss.cpp @@ -0,0 +1,52 @@ +/** + * @file bce_loss.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "bce_loss.h" + +#include "computational_graph/loss_functions/bce_node.h" + +#include + +using namespace std; +using namespace train; + +/** + * @brief Expected shapes: (batchsize) or (batchsize, 1) + * @return Tensor of shape (1) + */ +shared_ptr BceLoss::operator()(const shared_ptr y, const shared_ptr ypred) const { + if(!ypred->getRequiresGrad()) { + __throw_invalid_argument("ypred must have gradient enabled"); + } + else if(y->getDevice() != ypred->getDevice()){ + __throw_invalid_argument("y and ypred must be on same device"); + } + else if(y->getDims()!=ypred->getDims()){ + __throw_invalid_argument("Tensors must be of same shape"); + } + + auto bce = [](ftype y, ftype ypred){ + return y*log(std::max(ypred, epsBce)) + (1-y)*log(std::max(1-ypred, epsBce)); + }; + + const auto nBatches = y->getDims()[0]; + + ftype loss = 0; + for(tensorSize_t i=0; i(std::vector{1}, std::vector{-loss / nBatches}, y->getDevice(), true); + res->setCgNode(make_shared(y, ypred)); + assert(res->getRequiresGrad()); + + return res; +} \ No newline at end of file diff --git a/src/backend/training/loss_functions/bce_loss.h b/src/backend/training/loss_functions/bce_loss.h new file mode 100644 index 0000000..9ddc938 --- /dev/null +++ b/src/backend/training/loss_functions/bce_loss.h @@ -0,0 +1,22 @@ +/** + * @file bce_loss.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "loss_base.h" + +namespace train { + class BceLoss final : public LossBase { + public: + std::shared_ptr operator()(const std::shared_ptr y, + const std::shared_ptr ypred) const override; + }; +} diff --git a/src/backend/training/loss_functions/bce_sigmoid_loss.cpp b/src/backend/training/loss_functions/bce_sigmoid_loss.cpp new file mode 100644 index 0000000..2634bf4 --- /dev/null +++ b/src/backend/training/loss_functions/bce_sigmoid_loss.cpp @@ -0,0 +1,53 @@ +/** + * @file bce_logits_loss.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-17 + * + * @copyright Copyright (c) 2026 + * + */ + + #include "bce_sigmoid_loss.h" + + #include "computational_graph/loss_functions/bce_sigmoid_node.h" + + #include + +using namespace std; +using namespace train; + +/** + * @brief Expected shapes: (batchsize) or (batchsize, 1) + * @return Tensor of shape (1) + */ +shared_ptr BceSigmoidLoss::operator()(const shared_ptr y, const shared_ptr logits) const { + if(!logits->getRequiresGrad()) { + __throw_invalid_argument("logits must have gradient enabled"); + } + else if(y->getDevice() != logits->getDevice()){ + __throw_invalid_argument("y and logits must be on same device"); + } + else if(y->getDims()!=logits->getDims()){ + __throw_invalid_argument("Tensors must be of same shape"); + } + + auto bceSimplified = [](ftype y, ftype logit){ + constexpr ftype zero = 0; + return std::max(logit, zero) - logit*y + log(1+exp(-std::abs(logit))); + }; + + const auto nBatches = y->getDims()[0]; + + ftype loss = 0; + for(tensorSize_t i=0; i(std::vector{1}, std::vector{loss / nBatches}, y->getDevice(), true); + res->setCgNode(make_shared(y, logits)); + assert(res->getRequiresGrad()); + + return res; +} \ No newline at end of file diff --git a/src/backend/training/loss_functions/bce_sigmoid_loss.h b/src/backend/training/loss_functions/bce_sigmoid_loss.h new file mode 100644 index 0000000..7aae6bc --- /dev/null +++ b/src/backend/training/loss_functions/bce_sigmoid_loss.h @@ -0,0 +1,22 @@ +/** + * @file bce_loss.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "loss_base.h" + +namespace train { + class BceSigmoidLoss final : public LossBase { + public: + std::shared_ptr operator()(const std::shared_ptr y, + const std::shared_ptr logits) const override; + }; +} diff --git a/src/backend/training/loss_functions/crossentropy_loss.cpp b/src/backend/training/loss_functions/crossentropy_loss.cpp new file mode 100644 index 0000000..d1a5291 --- /dev/null +++ b/src/backend/training/loss_functions/crossentropy_loss.cpp @@ -0,0 +1,55 @@ +/** + * @file crossentropy_loss.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-17 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "crossentropy_loss.h" + +#include "computational_graph/loss_functions/crossentropy_node.h" + +#include + +using namespace std; +using namespace train; + +/** + * @brief Expected shapes: (batch_size, n_classes) + * @return Tensor of shape (1) + */ +shared_ptr CrossEntropyLoss::operator()(const shared_ptr y, const shared_ptr ypred) const { + if(!ypred->getRequiresGrad()) { + __throw_invalid_argument("ypred must have gradient enabled"); + } + else if(y->getDevice() != ypred->getDevice()){ + __throw_invalid_argument("y and ypred must be on same device"); + } + else if(y->getDims()!=ypred->getDims()){ + __throw_invalid_argument("Tensors must be of same shape"); + } + + auto ce = [&y, &ypred](const tensorDim_t b){ + ftype res = 0; + for(tensorDim_t i=0; igetDims()[-1]; i++){ + res += y->get(b, i) * log(std::max(ypred->get(b, i), epsCrossentropy)); + } + return res; + }; + + const auto nBatches = y->getDims()[0]; + ftype loss = 0; + for(tensorSize_t b=0; b(std::vector{1}, std::vector{-loss / nBatches}, y->getDevice(), true); + res->setCgNode(std::make_shared(y, ypred)); + assert(res->getRequiresGrad()); + + return res; +} \ No newline at end of file diff --git a/src/backend/training/loss_functions/crossentropy_loss.h b/src/backend/training/loss_functions/crossentropy_loss.h new file mode 100644 index 0000000..dfd71b3 --- /dev/null +++ b/src/backend/training/loss_functions/crossentropy_loss.h @@ -0,0 +1,22 @@ +/** + * @file crossentropy_loss.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "loss_base.h" + +namespace train { + class CrossEntropyLoss final : public LossBase { + public: + std::shared_ptr operator()(const std::shared_ptr y, + const std::shared_ptr ypred) const override; + }; +} diff --git a/src/backend/training/loss_functions/crossentropy_softmax_loss.cpp b/src/backend/training/loss_functions/crossentropy_softmax_loss.cpp new file mode 100644 index 0000000..a2b7866 --- /dev/null +++ b/src/backend/training/loss_functions/crossentropy_softmax_loss.cpp @@ -0,0 +1,93 @@ +/** + * @file crossentropy_logits_loss.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelflogits->nl) + * @brief + * @version 0.1 + * @date 2026-03-17 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "crossentropy_softmax_loss.h" + +#include "computational_graph/loss_functions/crossentropy_softmax_node.h" + +#include + +using namespace std; +using namespace train; + +/** + * @brief Expected shapes: (batch_size, n_classes) + * @return Tensor of shape (1) + */ +shared_ptr CrossEntropySoftmaxLoss::operator()(const shared_ptr y, const shared_ptr logits) const { + if(!logits->getRequiresGrad()) { + __throw_invalid_argument("logits must have gradient enabled"); + } + else if(y->getDevice() != logits->getDevice()){ + __throw_invalid_argument("y and logits must be on same device"); + } + else if(y->getDims()!=logits->getDims()){ + __throw_invalid_argument("Tensors must be of same shape"); + } + + //////////////////////////////////////////////// + + const auto nRows = logits->getDims()[-2]; + const auto nCols = logits->getDims()[-1]; + + // pre-compute exponents and max-values + vector maxValues(nRows); + Tensor tmp(logits->getDims(), logits->getDevice(), false); + for(tensorDim_t i=0; i::infinity(); + for(tensorDim_t j=0; jget(i, j)); + } + + maxValues[i] = maxV; + + for(tensorDim_t j=0; jget(i, j)-maxV; + tmp.set(exp(e), i, j); + } + } + + const tensorSize_t stride = logits->getDims()[-1]; + ftype loss = 0; + + /** + * CE = -sum_i(y_i * z_i) + log(sum_j(exp(z_j))) with + * log(sum_j(exp(z_j))) = max(z) + log(sum_j(exp(z_j - max(z)))). + * for numerical stability + */ + auto compute = [&loss, &y, &logits, &tmp, &maxValues, stride](tensorSize_t start){ + ftype lsum = 0; + for(tensorSize_t i=start; i0){ // y either zero or one + loss += -(*logits)[i] + maxValues[j] + lsum; + } + } + }; + + tensorSize_t offset=0; + while(offsetgetSize()) { + compute(offset); + offset += stride; + } + + auto res = make_shared(std::vector{1}, std::vector{loss / logits->getDims()[0]}, y->getDevice(), true); + res->setCgNode(std::make_shared(y, logits)); + assert(res->getRequiresGrad()); + + return res; +} \ No newline at end of file diff --git a/src/backend/training/loss_functions/crossentropy_softmax_loss.h b/src/backend/training/loss_functions/crossentropy_softmax_loss.h new file mode 100644 index 0000000..6feb16a --- /dev/null +++ b/src/backend/training/loss_functions/crossentropy_softmax_loss.h @@ -0,0 +1,22 @@ +/** + * @file crossentropy_loss.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-07 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "loss_base.h" + +namespace train { + class CrossEntropySoftmaxLoss final : public LossBase { + public: + std::shared_ptr operator()(const std::shared_ptr y, + const std::shared_ptr logits) const override; + }; +} diff --git a/src/backend/training/loss_functions/loss_base.h b/src/backend/training/loss_functions/loss_base.h index e6c7922..9da95ea 100644 --- a/src/backend/training/loss_functions/loss_base.h +++ b/src/backend/training/loss_functions/loss_base.h @@ -3,7 +3,7 @@ * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) * @brief * @version 0.1 - * @date 2026-02-02 + * @date 2026-03-07 * * @copyright Copyright (c) 2026 * @@ -13,7 +13,22 @@ #include "data_modeling/tensor.h" -class LossBase { - public: - virtual Tensor operator()(Tensor& y, const Tensor& y_target) const noexcept = 0; -}; \ No newline at end of file +#include + +namespace train { + class LossBase { + public: + LossBase() = default; + + LossBase(const LossBase& other) = delete; + LossBase& operator=(const LossBase& other) = delete; + + LossBase(LossBase&& other) noexcept = default; + LossBase& operator=(LossBase&& other) noexcept = default; + + ~LossBase() noexcept = default; + + virtual std::shared_ptr operator()(const std::shared_ptr y, + const std::shared_ptr ypred) const = 0; + }; +} diff --git a/src/backend/training/loss_functions/mse_loss.cpp b/src/backend/training/loss_functions/mse_loss.cpp deleted file mode 100644 index 7c7c478..0000000 --- a/src/backend/training/loss_functions/mse_loss.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/** - * @file mse_loss.cpp - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2026-02-03 - * - * @copyright Copyright (c) 2026 - * - */ - -#include "mse_loss.h" -#include "utility/global_params.h" - -#include - -/** - * @brief Expects shape (b-size, 1), or simply (batch-size) - * - * @param y Predicted output - * @param t_target Target - * @return Tensor of shape (b-size, 1) - */ -Tensor MseLoss::operator()(Tensor& y, const Tensor& y_target) const noexcept { - auto res = Tensor(y); - for(tensorSize_t i = 0; i +#include + +using namespace std; +using namespace train; + +/** + * @brief Expected shapes: (batchsize) or (batchsize, 1) + * @return Tensor of shape (1) + */ +shared_ptr RmseLoss::operator()(const shared_ptr y, const shared_ptr ypred) const { + if(!ypred->getRequiresGrad()) { + __throw_invalid_argument("ypred must have gradient enabled"); + } + else if(y->getDevice() != ypred->getDevice()){ + __throw_invalid_argument("y and ypred must be on same device"); + } + else if(y->getDims()!=ypred->getDims()){ + __throw_invalid_argument("Tensors must be of same shape"); + } + + auto diffPow = [](ftype y, ftype ypred){ + auto diff = y - ypred; + return diff * diff; + }; + + const auto nBatches = y->getDims()[0]; + + ftype loss = 0; + for(tensorSize_t i=0; i(std::vector{1}, std::vector{loss}, y->getDevice(), true); + res->setCgNode(make_shared(y, ypred, loss)); + assert(res->getRequiresGrad()); + + return res; +} \ No newline at end of file diff --git a/src/backend/training/loss_functions/rmse_loss.h b/src/backend/training/loss_functions/rmse_loss.h new file mode 100644 index 0000000..804f88d --- /dev/null +++ b/src/backend/training/loss_functions/rmse_loss.h @@ -0,0 +1,22 @@ +/** + * @file rmse_loss.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-14 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "loss_base.h" + +namespace train { + class RmseLoss final : public LossBase { + public: + std::shared_ptr operator()(const std::shared_ptr y, + const std::shared_ptr ypred) const override; + }; +} diff --git a/src/backend/training/optimizers/optimizer_base.cpp b/src/backend/training/optimizers/optimizer_base.cpp index 58e35c3..e2a6d8d 100644 --- a/src/backend/training/optimizers/optimizer_base.cpp +++ b/src/backend/training/optimizers/optimizer_base.cpp @@ -1,9 +1,9 @@ /** - * @file optimizer_base.h - * @author your name (you@domain.com) + * @file optimizer_base.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) * @brief * @version 0.1 - * @date 2026-02-02 + * @date 2026-03-14 * * @copyright Copyright (c) 2026 * @@ -11,10 +11,15 @@ #include "optimizer_base.h" -float OptimizerBase::getLr() const noexcept { - return lr; -} +#include "data_modeling/tensor_functions.h" -void OptimizerBase::setLr(const float lr) noexcept { - this->lr = lr; +using namespace train; + +void OptimizerBase::zeroGrad() noexcept{ + for(auto& p: params){ + auto grads = p->getGrads(); + + if(grads) + TensorFunctions::ToZeros(*grads); + } } \ No newline at end of file diff --git a/src/backend/training/optimizers/optimizer_base.h b/src/backend/training/optimizers/optimizer_base.h index 732b219..af4f6ef 100644 --- a/src/backend/training/optimizers/optimizer_base.h +++ b/src/backend/training/optimizers/optimizer_base.h @@ -3,7 +3,7 @@ * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) * @brief * @version 0.1 - * @date 2026-02-02 + * @date 2026-03-07 * * @copyright Copyright (c) 2026 * @@ -12,13 +12,38 @@ #pragma once #include "data_modeling/tensor.h" +#include "training/loss_functions/loss_base.h" -class OptimizerBase { - private: - float lr = 0.05; +#include +#include + +namespace train { + class OptimizerBase { + protected: + const ftype lr; + const std::vector< std::shared_ptr > params; public: - virtual Tensor operator()(Tensor& t) const noexcept; - float getLr() const noexcept; - void setLr(const float lr) noexcept; -}; \ No newline at end of file + OptimizerBase(std::vector< std::shared_ptr > params, ftype lr) + : params{std::move(params)}, lr{lr} + { +#ifndef NDEBUG + for(const auto& param: params){ + assert(param); // we don't want nullptrs here + } +#endif // NDEBUG + }; + + ~OptimizerBase() noexcept = default; + + OptimizerBase(const OptimizerBase& other) = delete; + OptimizerBase& operator=(const OptimizerBase& other) = delete; + + OptimizerBase(OptimizerBase&& other) noexcept = default; + OptimizerBase& operator=(OptimizerBase&& other) noexcept = default; + + virtual void step() = 0; + + void zeroGrad() noexcept; + }; +} \ No newline at end of file diff --git a/src/backend/training/optimizers/rmsprop.cpp b/src/backend/training/optimizers/rmsprop.cpp new file mode 100644 index 0000000..c5a93a8 --- /dev/null +++ b/src/backend/training/optimizers/rmsprop.cpp @@ -0,0 +1,47 @@ +/** + * @file rmsprop.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-10 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "rmsprop.h" + +using namespace std; +using namespace train; + +void RmsPropOptimizer::step() { + constexpr ftype eps = 1e-9; + for(const auto& param: params){ + auto tPtr = param.get(); + const auto gPtr = tPtr->getGrads().get(); + auto vPtr = movingAvg[tPtr].get(); + + // update moving avg + if(vPtr!=nullptr) { // hot path + for(tensorSize_t i=0; igetSize(); i++){ + auto g = (*gPtr)[i]; + auto update = decay * (*vPtr)[i] + (1-decay)*g*g; + vPtr->set(update, i); + } + } + else { // init loop + movingAvg[tPtr] = make_unique(tPtr->getDims(), tPtr->getDevice(), false); // create empty tensor + vPtr = movingAvg[tPtr].get(); + for(tensorSize_t i=0; igetSize(); i++) { + auto g = (*gPtr)[i]; + vPtr->set((1-decay)*g*g, i); + } + } + + // update gradients + for(tensorSize_t i=0; igetSize(); i++) { + auto update = (*tPtr)[i] - lr * (*gPtr)[i] / ((*vPtr)[i] + eps); + tPtr->set(update, i); + } + } +} \ No newline at end of file diff --git a/src/backend/training/optimizers/rmsprop.h b/src/backend/training/optimizers/rmsprop.h new file mode 100644 index 0000000..967c18e --- /dev/null +++ b/src/backend/training/optimizers/rmsprop.h @@ -0,0 +1,34 @@ +/** + * @file rmsprop.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-10 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "optimizer_base.h" +#include "utility/global_params.h" + +#include + +namespace train { + class RmsPropOptimizer final : public OptimizerBase { + private: + const ftype decay; + std::unordered_map> movingAvg; + + public: + RmsPropOptimizer(std::vector< std::shared_ptr > params, ftype lr, ftype decay) + : OptimizerBase(std::move(params), lr), decay{decay} + { + for(const auto& param: params) { + movingAvg[param.get()] = nullptr; // lazy initialization + } + } + + void step() override; + }; +} \ No newline at end of file diff --git a/src/backend/training/optimizers/sgd.cpp b/src/backend/training/optimizers/sgd.cpp new file mode 100644 index 0000000..83f122d --- /dev/null +++ b/src/backend/training/optimizers/sgd.cpp @@ -0,0 +1,25 @@ +/** + * @file sgd.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-08 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "sgd.h" + +using namespace std; +using namespace train; + +void SgdOptimizer::step() { + for(auto& t: params){ + auto grads = t->getGrads(); + for(auto idx=0; idxgetSize(); idx++){ + auto updatedWeight = (*t)[idx] - lr*(*grads)[idx]; + t->set(updatedWeight, idx); + } + } +} \ No newline at end of file diff --git a/src/backend/training/optimizers/sgd.h b/src/backend/training/optimizers/sgd.h new file mode 100644 index 0000000..0d8a891 --- /dev/null +++ b/src/backend/training/optimizers/sgd.h @@ -0,0 +1,24 @@ +/** + * @file sgd.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-08 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "optimizer_base.h" + +#include "utility/global_params.h" + +namespace train { + class SgdOptimizer final : public OptimizerBase { + public: + SgdOptimizer(std::vector< std::shared_ptr > params, ftype lr) + : OptimizerBase(std::move(params), lr) { } + + void step() override; + }; +} \ No newline at end of file diff --git a/src/backend/training/trainers/base_train_loop.cpp b/src/backend/training/trainers/base_train_loop.cpp new file mode 100644 index 0000000..ff95688 --- /dev/null +++ b/src/backend/training/trainers/base_train_loop.cpp @@ -0,0 +1,63 @@ +/** + * @file base_train_loop.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-11 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "base_train_loop.h" + +#include + +#include +#include +#include + +#include + +using namespace std; +using namespace train; + +void BaseTrainLoop::run(shared_ptr& x, shared_ptr& y, const bool shuffle, const bool verbose) { + const auto nSamples = x->getDims()[0]; + + for(size_t e=0; e indices(nSamples); + std::iota(indices.begin(), indices.end(), 0); + + if(verbose) + cout << "\nEpoch " << e; + + if(shuffle){ + std::random_device rd; + std::mt19937 rng(rd()); + std::shuffle(indices.begin(), indices.end(), rng); + } + + tensorDim_t low = 0; + + int batch = 0; + while(low < nSamples){ + if(verbose) + cout << "\nBatch " << batch << endl; + + std::span batchSpan(indices.data() + low, low+bsize < nSamples ? bsize : nSamples-low); + + auto xBatch = make_shared(x->getSlice(batchSpan)); + auto yBatch = make_shared(y->getSlice(batchSpan)); + + auto yPred = (*graph)(xBatch); + auto l = (*loss)(yBatch, yPred); + + l->backward(); + optim->step(); + optim->zeroGrad(); + + low += bsize; + } + } +} \ No newline at end of file diff --git a/src/backend/training/trainers/base_train_loop.h b/src/backend/training/trainers/base_train_loop.h new file mode 100644 index 0000000..3beeb46 --- /dev/null +++ b/src/backend/training/trainers/base_train_loop.h @@ -0,0 +1,49 @@ +/** + * @file base_train_loop.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-13 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "data_modeling/tensor.h" +#include "module/module_base.h" + +#include "training/optimizers/optimizer_base.h" +#include "training/loss_functions/loss_base.h" + +#include +#include + +namespace train { + class BaseTrainLoop { + protected: + const size_t epochs; + const tensorDim_t bsize; + + std::shared_ptr loss; + std::shared_ptr optim; + std::shared_ptr graph; + + public: + BaseTrainLoop(std::shared_ptr graph, std::shared_ptr loss, + std::shared_ptr optim, size_t epochs, tensorDim_t bsize) + : graph{std::move(graph)}, optim{std::move(optim)}, loss{loss}, epochs{epochs}, bsize{bsize} + { } + + ~BaseTrainLoop() noexcept = default; + + BaseTrainLoop(const BaseTrainLoop& other) = delete; + BaseTrainLoop& operator=(const BaseTrainLoop& other) = delete; + + BaseTrainLoop(BaseTrainLoop&& other) noexcept = default; + BaseTrainLoop& operator=(BaseTrainLoop&& other) noexcept = default; + + void run(std::shared_ptr& x, std::shared_ptr& y, bool shuffle, bool verbose=true); + }; +} \ No newline at end of file diff --git a/src/backend/utility/global_params.h b/src/backend/utility/global_params.h index 4e91fb2..3d6edcb 100644 --- a/src/backend/utility/global_params.h +++ b/src/backend/utility/global_params.h @@ -13,7 +13,7 @@ #include -using ftype = float; // TODO: make compiler flag +using ftype = float; // TODO: make compiler flag? /** * IMPORTANT: For the following block we assume that @@ -27,4 +27,13 @@ using ftype = float; // TODO: make compiler flag * request fit into datatype tensorDim_t. */ using tensorDim_t = std::uint16_t; -using tensorSize_t = std::uint32_t; \ No newline at end of file +using tensorSize_t = std::uint32_t; + +// we assert this here so during conversions of tensorDim_t to +// tensorSize_t we do not need to cast explicitely +static_assert(sizeof(tensorDim_t)<=sizeof(tensorSize_t)); + +// ----------------- Numerical stability ------------------- + +constexpr ftype epsCrossentropy = 1e-5; +constexpr ftype epsBce = 1e-5; \ No newline at end of file diff --git a/src/backend/utility/initializers.cpp b/src/backend/utility/initializers.cpp index 4240345..0fad81c 100644 --- a/src/backend/utility/initializers.cpp +++ b/src/backend/utility/initializers.cpp @@ -11,36 +11,27 @@ #include "initializers.h" -#include -#include +#include using namespace std; using namespace utility; -namespace { - class GaussianInitializer final : public InitializerBase { - public: - GaussianInitializer(); - ftype drawNumber() const override; - }; +ftype GaussianInitializer::drawNumber() const { + return dist(gen); +} - GaussianInitializer::GaussianInitializer() : InitializerBase() {} +ftype UniformXavierInitializer::computeRange(ftype nInputs, ftype nOutputs) { + return sqrt(6/nInputs + nOutputs); +} - ftype GaussianInitializer::drawNumber() const { - static std::random_device rd; - static std::mt19937 gen{rd()}; - static std::normal_distribution dist; +ftype UniformXavierInitializer::drawNumber() const { + return dist(gen); +} - return dist(gen); - } +ftype NormalXavierInitializer::computeSigma(ftype nInputs, ftype nOutputs) { + return sqrt(6/nInputs + nOutputs); } -unique_ptr InitializerFactory::getInitializer(InitClass ic) { - switch(ic){ - case InitClass::Gaussian: - return make_unique(); - default: - __throw_invalid_argument("Init class not implemented yet"); - } - return nullptr; // never reached, suppress warning +ftype NormalXavierInitializer::drawNumber() const { + return dist(gen); } \ No newline at end of file diff --git a/src/backend/utility/initializers.h b/src/backend/utility/initializers.h index ba76707..85eb9c5 100644 --- a/src/backend/utility/initializers.h +++ b/src/backend/utility/initializers.h @@ -13,24 +13,98 @@ #include "global_params.h" +#include +#include + #include -#include - -namespace utility{ - enum class InitClass { - Gaussian - }; - - class InitializerBase { - public: - InitializerBase() = default; - virtual ~InitializerBase() = default; - virtual ftype drawNumber() const = 0; - }; - - class InitializerFactory final { - public: - InitializerFactory() = delete; - static std::unique_ptr getInitializer(InitClass ic); - }; +#include + +namespace utility { + class InitializerBase { + protected: + static inline std::optional randomSeed_opt = std::nullopt; + + public: + InitializerBase() = default; + + virtual ~InitializerBase() = default; + virtual ftype drawNumber() const = 0; + + static void setSeed(unsigned int s) noexcept { randomSeed_opt = s; } + }; + + class GaussianInitializer final : public InitializerBase { + private: + std::random_device rd{}; + mutable std::mt19937 gen; + mutable std::normal_distribution dist; + + public: + GaussianInitializer(ftype stddev) : gen{rd()}, dist{0, stddev} + { + if(randomSeed_opt){ + gen = std::mt19937{randomSeed_opt.value()}; + } + } + + GaussianInitializer(ftype stddev, unsigned int seed) + : dist{0, stddev} + { + gen = std::mt19937{seed}; + } + + ftype drawNumber() const override; + }; + + class UniformXavierInitializer final : public InitializerBase { + private: + std::random_device rd{}; + mutable std::mt19937 gen; + mutable std::uniform_real_distribution dist; + + ftype computeRange(ftype nInputs, ftype nOutputs); + + public: + UniformXavierInitializer(tensorDim_t nInputs, tensorDim_t nOutputs) + : gen{rd()}, dist{-computeRange(nInputs, nOutputs), computeRange(nInputs, nOutputs)} + { + if(randomSeed_opt){ + gen = std::mt19937{randomSeed_opt.value()}; + } + } + + UniformXavierInitializer(tensorDim_t nInputs, tensorDim_t nOutputs, unsigned int seed) + : dist{-computeRange(nInputs, nOutputs), computeRange(nInputs, nOutputs)} + { + gen = std::mt19937{seed}; + } + + ftype drawNumber() const override; + }; + + class NormalXavierInitializer final : public InitializerBase { + private: + std::random_device rd{}; + mutable std::mt19937 gen; + mutable std::normal_distribution dist; + + ftype computeSigma(ftype nInputs, ftype nOutputs); + + public: + NormalXavierInitializer(tensorDim_t nInputs, tensorDim_t nOutputs) + : gen{rd()}, dist{0, computeSigma(nInputs, nOutputs)} + { + if(randomSeed_opt){ + gen = std::mt19937{randomSeed_opt.value()}; + } + } + + NormalXavierInitializer(tensorDim_t nInputs, tensorDim_t nOutputs, unsigned int seed) + : dist{0, computeSigma(nInputs, nOutputs)} + { + gen = std::mt19937{seed}; + } + + ftype drawNumber() const override; + }; } diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 6c5370d..405d1f3 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -1,16 +1,41 @@ include_directories( - "${CMAKE_CURRENT_SOURCE_DIR}/python_utility" + "${CMAKE_CURRENT_SOURCE_DIR}/py_utility" ) # remove the lib... prefix set(CMAKE_SHARED_MODULE_PREFIX "") add_library(_core MODULE - data_modeling/py_data_modeling.cpp - data_modeling/py_data_modeling_util.cpp + py_core/py_core.cpp + py_core/py_core_util.cpp ) - -set_target_properties(_core PROPERTIES + +add_library(_nn MODULE + py_nn/py_nn.cpp + ) + +add_library(_sys MODULE + py_sys/py_sys.cpp + ) + +add_library(_train MODULE + py_train/py_train.cpp + ) + +set_target_properties(_core _nn _sys _train PROPERTIES PREFIX "" - OUTPUT_NAME "_core" - LIBRARY_OUTPUT_DIRECTORY ${PYTHON_MODULE_DIR}) \ No newline at end of file + INSTALL_RPATH "$ORIGIN" # to find shared backend-core lib + BUILD_WITH_INSTALL_RPATH TRUE # use install RPATH even during build + LIBRARY_OUTPUT_DIRECTORY "${PYTHON_MODULE_DIR}") + +set_target_properties(_core PROPERTIES + OUTPUT_NAME "_core") + +set_target_properties(_nn PROPERTIES + OUTPUT_NAME "_nn") + +set_target_properties(_sys PROPERTIES + OUTPUT_NAME "_sys") + +set_target_properties(_train PROPERTIES + OUTPUT_NAME "_train") \ No newline at end of file diff --git a/src/python/data_modeling/py_data_modeling.cpp b/src/python/data_modeling/py_data_modeling.cpp deleted file mode 100644 index 88b4bec..0000000 --- a/src/python/data_modeling/py_data_modeling.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/** - * @file py_data_modeling.cpp - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2026-02-21 - * - * @copyright Copyright (c) 2026 - * - */ - -#include "data_modeling/tensor.h" - -#include "py_data_modeling_util.h" -#include "python_templates.h" -#include "custom_converters.h" - -#include "data_modeling/tensor.h" -#include "data_modeling/tensor_functions.h" -#include "computational_graph/graph_creation.h" - -#include -#include -#include - -BOOST_PYTHON_MODULE(_core) -{ - using namespace boost::python; - - // some macros to make code below easier to read - #define WRAP_TENSOR_METHOD_1(method) \ - +[](const Tensor& self, const Tensor& other) -> std::shared_ptr { \ - return std::make_shared(self.method(other)); \ - } - - #define WRAP_SCALAR(method, T) \ - +[](const Tensor& self, T val) -> std::shared_ptr { \ - return std::make_shared(self.method(val)); \ - } - - #define WRAP_SCALAR_REVERSE(op, T) \ - +[](const Tensor& self, T val) -> std::shared_ptr { \ - return std::make_shared(val op self); \ - } - - // different, since those are not methods anymore - #define WRAP_FREE_MEMBER_FUNC_1(fPtr, T1, T2) \ - +[](const Tensor& self, int v1, int v2) -> std::shared_ptr { \ - return std::make_shared((self.*fPtr)(v1, v2)); \ - } - - #define WRAP_FREE_MEMBER_FUNC_2(fPtr, T1, T2, T3) \ - +[](const Tensor& self, T1 v1, T2 v2, T3 v3) -> std::shared_ptr { \ - return std::make_shared((self.*fPtr)(v1, v2, v3)); \ - } - - #define WRAP_FREE_FUNC_1(fPtr, T1) \ - +[](T1 v1) -> std::shared_ptr { \ - return std::make_shared((*fPtr)(v1)); \ - } - - #define WRAP_FREE_FUNC_2(fPtr, T1, T2) \ - +[](T1 v1, T2 v2) -> std::shared_ptr { \ - return std::make_shared((*fPtr)(v1, v2)); \ - } - - #define WRAP_FREE_FUNC_3(fPtr, T1, T2, T3) \ - +[](T1 v1, T2 v2, T3 v3) -> std::shared_ptr { \ - return std::make_shared((*fPtr)(v1, v2, v3)); \ - } - - #define WRAP_FREE_FUNC_4(fPtr, T) \ - +[](const Tensor& self, T val) -> std::shared_ptr { \ - return (*fPtr)(self.getSharedPtr(), val); \ - } - - #define WRAP_FREE_FUNC_5(fPtr) \ - +[](const Tensor& self, const Tensor& other) -> std::shared_ptr { \ - return (*fPtr)(self.getSharedPtr(), other.getSharedPtr()); \ - } - - #define WRAP_FREE_FUNC_6(fPtr, T) \ - +[](const Tensor& self, T val) -> std::shared_ptr { \ - return (*fPtr)(val, self.getSharedPtr()); \ - } - - #define WRAP_FREE_FUNC_7(fPtr) \ - +[](const Tensor& self) -> std::shared_ptr { \ - return (*fPtr)(self.getSharedPtr()); \ - } - - #define WRAP_FUNC_AND_CONVERT_DTYPE_1(method) \ - +[](const Tensor& self, int v1) -> ftype { \ - return self.method(static_cast(v1)); \ - } - - #define WRAP_FUNC_AND_CONVERT_DTYPE_2(method) \ - +[](const Tensor& self, int v1, int v2) -> ftype { \ - return self.method(static_cast(v1), static_cast(v2)); \ - } - - #define WRAP_FUNC_AND_CONVERT_DTYPE_3(method) \ - +[](const Tensor& self, int v1, int v2, int v3) -> ftype { \ - return self.method(static_cast(v1), static_cast(v2), \ - static_cast(v3)); \ - } - - #define WRAP_FUNC_AND_CONVERT_DTYPE_4(method) \ - +[](const Tensor& self, int v1, int v2, int v3, int v4) -> ftype { \ - return self.method(static_cast(v1), static_cast(v2), \ - static_cast(v3), static_cast(v4)); \ - } - - // classes - class_("Dimension", no_init) - .add_property("list", &Dimension::getItem) - .def("__str__", &Py_Util::toString) - .def("__eq__", Py_DataModeling::dimEquals1) - .def("__eq__", Py_DataModeling::dimEquals2) - .def("__ne__", Py_DataModeling::nDimEquals1) - .def("__ne__", Py_DataModeling::nDimEquals2) - ; - - enum_("Device") - .value("CPU", Device::CPU) - .value("CUDA", Device::CUDA) - ; - - // register implicit dtype conversion - custom_converters::PyListToVectorConverter(); - custom_converters::PyListToVectorConverter(); - - // to convert std::shared_ptr to std::shared_ptr> in Python - boost::python::register_ptr_to_python< std::shared_ptr >(); - - // we manage via shared_ptr, since we deleted copy-ctor - class_, boost::noncopyable>("Tensor", no_init) - .def(init&, optional >()) - .def(init&, Device, optional >()) - .def(init&, const std::vector&, optional >()) - .def(init&, const std::vector&, Device, optional >()) - - // static creation methods - .def("ones", WRAP_FREE_FUNC_1(Py_DataModeling::Ones0, std::vector)) - .def("ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones1, std::vector, Device)) - .def("ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones2, std::vector, const bool)) - .def("ones", WRAP_FREE_FUNC_3(Py_DataModeling::Ones3, std::vector, Device, const bool)).staticmethod("ones") - - .def("zeros", WRAP_FREE_FUNC_1(Py_DataModeling::Zeros0, std::vector)) - .def("zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros1, std::vector, Device)) - .def("zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros2, std::vector, const bool)) - .def("zeros", WRAP_FREE_FUNC_3(Py_DataModeling::Zeros3, std::vector, Device, const bool)).staticmethod("zeros") - - .def("gauss", WRAP_FREE_FUNC_1(Py_DataModeling::Gaussian0, std::vector)) - .def("gauss", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian1, std::vector, Device)) - .def("gauss", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian2, std::vector, const bool)) - .def("gauss", WRAP_FREE_FUNC_3(Py_DataModeling::Gaussian3, std::vector, Device, const bool)).staticmethod("gauss") - - // properties - .add_property("device", &Tensor::getDevice, &Tensor::setDevice) - .add_property("dims", make_function(&Tensor::getDims, return_internal_reference<>())) - .add_property("grads", make_function(&Tensor::getGrads)) - .add_property("requiresGrad", &Tensor::getRequiresGrad, &Tensor::setRequiresGrad) - - // operators - .def("__str__", &Py_Util::toString) - .def("__repr__", &Py_Util::toString) - .def("__len__", &Tensor::getSize) - .def("__getitem__", WRAP_FREE_FUNC_4(&Py_DataModeling::getItemAsTensor1, tensorSize_t)) - .def("__getitem__", WRAP_FREE_FUNC_4(&Py_DataModeling::getItemAsTensor2, std::vector)) - .def("__setitem__", &Py_DataModeling::tensorSetItem) - - // arithmetics - .def("__matmul__", WRAP_FREE_FUNC_5(Py_DataModeling::matmul)) - .def("__add__", WRAP_FREE_FUNC_5(Py_DataModeling::elementwiseadd)) // elementwise add - .def("__add__", WRAP_FREE_FUNC_4(Py_DataModeling::scalaradd, ftype)) - .def("__radd__", WRAP_FREE_FUNC_6(Py_DataModeling::rscalaradd, ftype)) - - .def("__mul__", WRAP_FREE_FUNC_5(Py_DataModeling::elementwisemul)) // elementwise mult - .def("__mul__", WRAP_FREE_FUNC_4(Py_DataModeling::scalarmul, ftype)) - .def("__rmul__", WRAP_FREE_FUNC_6(Py_DataModeling::rscalarmul, ftype)) - - .def("__sub__", WRAP_FREE_FUNC_4(Py_DataModeling::scalarsub, ftype)) - .def("__truediv__", WRAP_FREE_FUNC_4(Py_DataModeling::scalardiv, ftype)) - - // member functions - .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_1(Tensor::getItem)) - .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_2(Tensor::getItem)) - .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_3(Tensor::getItem)) - .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_4(Tensor::getItem)) - .def("getitem", Py_DataModeling::getItemVector) // the vector arg - - .def("sum", WRAP_FREE_FUNC_7(&(graph::sumTensor))) - - .def("reset", Py_DataModeling::reset1) - .def("reset", Py_DataModeling::reset2) - - .def("transpose", WRAP_FREE_MEMBER_FUNC_1(Py_DataModeling::transpose1, int, int)) - .def("transpose", WRAP_FREE_MEMBER_FUNC_2(Py_DataModeling::transpose2, int, int, bool)) - .def("transposeThis", Py_DataModeling::transposeThis1) - .def("transposeThis", Py_DataModeling::transposeThis2) - - .def("backward", &Tensor::backward) - ; - - // functions - def("Ones", WRAP_FREE_FUNC_1(Py_DataModeling::Ones0, std::vector)); - def("Ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones1, std::vector, Device)); - def("Ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones2, std::vector, const bool)); - def("Ones", WRAP_FREE_FUNC_3(Py_DataModeling::Ones3, std::vector, Device, const bool)); - - def("Zeros", WRAP_FREE_FUNC_1(Py_DataModeling::Zeros0, std::vector)); - def("Zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros1, std::vector, Device)); - def("Zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros2, std::vector, const bool)); - def("Zeros", WRAP_FREE_FUNC_3(Py_DataModeling::Zeros3, std::vector, Device, const bool)); - - def("Gaussian", WRAP_FREE_FUNC_1(Py_DataModeling::Gaussian0, std::vector)); - def("Gaussian", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian1, std::vector, Device)); - def("Gaussian", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian2, std::vector, const bool)); - def("Gaussian", WRAP_FREE_FUNC_3(Py_DataModeling::Gaussian3, std::vector, Device, const bool)); -} \ No newline at end of file diff --git a/src/python/data_modeling/py_data_modeling_util.h b/src/python/data_modeling/py_data_modeling_util.h deleted file mode 100644 index 82a8343..0000000 --- a/src/python/data_modeling/py_data_modeling_util.h +++ /dev/null @@ -1,135 +0,0 @@ -/** - * @file util.h - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief Helper and wrapper functions - * @version 0.1 - * @date 2026-02-21 - * - * @copyright Copyright (c) 2026 - * - */ - -#pragma once - -#include "data_modeling/dim_type.h" - -#include "data_modeling/tensor.h" -#include "data_modeling/tensor_functions.h" -#include "computational_graph/graph_creation.h" - -#include -#include - -#include - -namespace Py_DataModeling { - - /********************************************************************************************************* - ********************************************** Dimension ************************************************* - *********************************************************************************************************/ - - inline bool (Dimension::*dimEquals1)(const Dimension&) const = &Dimension::operator==; - inline bool (Dimension::*dimEquals2)(const std::vector&) const = &Dimension::operator==; - - inline bool (Dimension::*nDimEquals1)(const Dimension&) const = &Dimension::operator!=; - inline bool (Dimension::*nDimEquals2)(const std::vector&) const = &Dimension::operator!=; - /********************************************************************************************************* - *********************************************** Tensor *************************************************** - *********************************************************************************************************/ - - ftype tensorGetItem(const Tensor& self, boost::python::object index); - void tensorSetItem(Tensor& self, boost::python::object index, ftype value); - - // need wrappers for default arguments, see - // https://beta.boost.org/doc/libs/develop/libs/python/doc/html/tutorial/tutorial/functions.html - inline auto OnesWrapper0(std::vector dims) { - return TensorFunctions::Ones(std::move(dims)); - } - - inline auto OnesWrapper1(std::vector dims, Device d) { - return TensorFunctions::Ones(std::move(dims), d); - } - - inline auto ZerosWrapper0(std::vector dims) { - return TensorFunctions::Zeros(std::move(dims)); - } - - inline auto ZerosWrapper1(std::vector dims, Device d) { - return TensorFunctions::Zeros(std::move(dims), d); - } - - inline auto GaussianWrapper0(std::vector dims) { - return TensorFunctions::Gaussian(std::move(dims)); - } - - inline auto GaussianWrapper1(std::vector dims, Device d) { - return TensorFunctions::Gaussian(std::move(dims), d); - } - - inline Tensor (*Ones0)(std::vector) = &OnesWrapper0; - inline Tensor (*Ones1)(std::vector, Device) = &OnesWrapper1; - inline Tensor (*Ones2)(std::vector, const bool) = &(TensorFunctions::Ones); - inline Tensor (*Ones3)(std::vector, Device, const bool) = &(TensorFunctions::Ones); - - inline Tensor (*Zeros0)(std::vector) = &ZerosWrapper0; - inline Tensor (*Zeros1)(std::vector, Device) = &ZerosWrapper1; - inline Tensor (*Zeros2)(std::vector, const bool) = &(TensorFunctions::Zeros); - inline Tensor (*Zeros3)(std::vector, Device, const bool) = &(TensorFunctions::Zeros); - - inline Tensor (*Gaussian0)(std::vector) = &GaussianWrapper0; - inline Tensor (*Gaussian1)(std::vector, Device) = &GaussianWrapper1; - inline Tensor (*Gaussian2)(std::vector, const bool) = &(TensorFunctions::Gaussian); - inline Tensor (*Gaussian3)(std::vector, Device, const bool) = &(TensorFunctions::Gaussian); - - inline void (Tensor::*reset1)(const ftype) = &Tensor::reset; - inline void (Tensor::*reset2)(const utility::InitClass) = &Tensor::reset; - - inline void (Tensor::*transposeThis1)() = &Tensor::transposeThis; - inline void (Tensor::*transposeThis2)(int, int) = &Tensor::transposeThis; - inline Tensor (Tensor::*transpose1)(int, int) const = &Tensor::transpose; - inline Tensor (Tensor::*transpose2)(int, int, bool) const = &Tensor::transpose; - - inline ftype (Tensor::*getItemVector)(const std::vector&) const = &Tensor::getItem; - - /********************************************************************************************************* - ***************************************** Graph creation ************************************************* - *********************************************************************************************************/ - - // multiplications - inline std::shared_ptr (*elementwisemul) - (const std::shared_ptr left, const std::shared_ptr right) = &(graph::mul); - - inline std::shared_ptr (*scalarmul) - (const std::shared_ptr, ftype) = &(graph::mul); - - inline std::shared_ptr (*rscalarmul) - (ftype, const std::shared_ptr) = &(graph::mul); - - // additions - inline std::shared_ptr (*elementwiseadd) - (const std::shared_ptr left, const std::shared_ptr right) = &(graph::add); - - inline std::shared_ptr (*scalaradd) - (const std::shared_ptr, ftype) = &(graph::add); - - inline std::shared_ptr (*rscalaradd) - (ftype, const std::shared_ptr) = &(graph::add); - - // matmul - inline std::shared_ptr (*matmul) - (const std::shared_ptr left, const std::shared_ptr right) = &(graph::matmul); - - // sub, div - inline std::shared_ptr (*scalarsub) - (const std::shared_ptr, ftype) = &(graph::sub); - - inline std::shared_ptr (*scalardiv) - (const std::shared_ptr, ftype) = &(graph::div); - - // get - inline std::shared_ptr (*getItemAsTensor1) - (const std::shared_ptr& t, tensorSize_t idx) = &(graph::get); - - inline std::shared_ptr (*getItemAsTensor2) - (const std::shared_ptr& t, const std::vector& idx) = &(graph::get); -} \ No newline at end of file diff --git a/src/python/layers/py_layers.cpp b/src/python/layers/py_layers.cpp deleted file mode 100644 index 5fc3613..0000000 --- a/src/python/layers/py_layers.cpp +++ /dev/null @@ -1,105 +0,0 @@ -/** - * @file layers.cpp - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2025-11-17 - * - * @copyright Copyright (c) 2025 - * - */ - -#include "py_layers.h" - -#include - -using namespace boost::python; - -ftype Py_Layers::layerGetItem(const layers::LayerBase& self, boost::python::object index) { - extract int_extractor(index); - - // Single integer index (1D) - if(int_extractor.check()) { - int i0 = int_extractor(); - return self.getItem(i0); - } - - // Tuple index (2D, 3D, or 4D) - extract tuple_extractor(index); - if(tuple_extractor.check()) { - tuple idx_tuple = tuple_extractor(); - int ndim = boost::python::len(idx_tuple); - - if (ndim == 2) { - int i0 = extract(idx_tuple[0]); - int i1 = extract(idx_tuple[1]); - return self.getItem(i0, i1); - } - else if (ndim == 3) { - int i0 = extract(idx_tuple[0]); - int i1 = extract(idx_tuple[1]); - int i2 = extract(idx_tuple[2]); - return self.getItem(i0, i1, i2); - } - else if (ndim == 4) { - int i0 = extract(idx_tuple[0]); - int i1 = extract(idx_tuple[1]); - int i2 = extract(idx_tuple[2]); - int i3 = extract(idx_tuple[3]); - return self.getItem(i0, i1, i2, i3); - } - else { - PyErr_SetString(PyExc_IndexError, "Unsupported number of dimensions"); - throw_error_already_set(); - } - } - - PyErr_SetString(PyExc_TypeError, "Index must be an integer or tuple"); - throw_error_already_set(); - return 0.0; // Never reached -} - -void Py_Layers::layerSetItem(layers::LayerBase& self, boost::python::object index, ftype value) { - extract int_extractor(index); - - // Single integer index (1D) - if(int_extractor.check()) { - int i0 = int_extractor(); - self.setItem(value, i0);\ - return; - } - - // Tuple index (2D, 3D, or 4D) - extract tuple_extractor(index); - if(tuple_extractor.check()) { - tuple idx_tuple = tuple_extractor(); - int ndim = boost::python::len(idx_tuple); - - if (ndim == 2) { - int i0 = extract(idx_tuple[0]); - int i1 = extract(idx_tuple[1]); - self.setItem(value, i0, i1); - } - else if (ndim == 3) { - int i0 = extract(idx_tuple[0]); - int i1 = extract(idx_tuple[1]); - int i2 = extract(idx_tuple[2]); - self.setItem(value, i0, i1, i2); - } - else if (ndim == 4) { - int i0 = extract(idx_tuple[0]); - int i1 = extract(idx_tuple[1]); - int i2 = extract(idx_tuple[2]); - int i3 = extract(idx_tuple[3]); - self.setItem(value, i0, i1, i2, i3); - } - else { - PyErr_SetString(PyExc_IndexError, "Unsupported number of dimensions"); - throw_error_already_set(); - } - return; - } - - PyErr_SetString(PyExc_TypeError, "Index must be an integer or tuple"); - throw_error_already_set(); -} \ No newline at end of file diff --git a/src/python/layers/py_layers.h b/src/python/layers/py_layers.h deleted file mode 100644 index c90929a..0000000 --- a/src/python/layers/py_layers.h +++ /dev/null @@ -1,61 +0,0 @@ -/** - * @file layers.h - * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) - * @brief - * @version 0.1 - * @date 2025-11-17 - * - * @copyright Copyright (c) 2025 - * - */ - -#pragma once - -#include "ff_layer.h" -#include "python_templates.h" - -#include -#include -#include -#include - -namespace Py_Layers { - ftype layerGetItem(const layers::LayerBase& self, boost::python::object index); - void layerSetItem(layers::LayerBase& self, boost::python::object index, ftype value); -} - -BOOST_PYTHON_MODULE(py_layers) -{ - using namespace boost::python; - - /** - * @brief Wrapper class needed for Boost Python to get the virtual function working - * the way it is intended. See documentation here: - * https://beta.boost.org/doc/libs/develop/libs/python/doc/html/tutorial/tutorial/exposing.html - * - */ - struct LayerBaseWrap : layers::LayerBase, wrapper { - Tensor forward(const Tensor& input) const { - return this->get_override("forward")(input); - } - - void print(std::ostream& os) const noexcept { - this->get_override("print")(os); - } - }; - - class_("LayerBase", no_init) - .def("forward", pure_virtual(&layers::LayerBase::forward)) - //.def("backward", &FfLayer::backward) - .def("getDims", &layers::LayerBase::getDims, return_internal_reference<>()) - .def("getTensor", &layers::LayerBase::getDims, return_internal_reference<>()) - .def("__getitem__", &Py_Layers::layerGetItem) - .def("__setitem__", &Py_Layers::layerSetItem) - .def("__str__", &toString) - ; - - class_ >("FfLayer", init()) - .def("forward", &layers::FfLayer::forward) - //.def("backward", &FfLayer::backward) - ; -} \ No newline at end of file diff --git a/src/python/networks/py_sequential.cpp b/src/python/networks/py_sequential.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/src/python/networks/py_sequential.h b/src/python/networks/py_sequential.h deleted file mode 100644 index e69de29..0000000 diff --git a/src/python/py_core/py_core.cpp b/src/python/py_core/py_core.cpp new file mode 100644 index 0000000..846d6e9 --- /dev/null +++ b/src/python/py_core/py_core.cpp @@ -0,0 +1,239 @@ +/** + * @file py_data_modeling.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-02-21 + * + * @copyright Copyright (c) 2026 + * + */ + +#include "data_modeling/tensor.h" + +#include "py_core_util.h" +#include "python_templates.h" +#include "custom_converters.h" + +#include "data_modeling/tensor.h" +#include "data_modeling/tensor_functions.h" +#include "computational_graph/tensor_ops/graph_creation.h" + +#include +#include +#include + +BOOST_PYTHON_MODULE(_core) +{ + using namespace boost::python; + + // some macros to make code below easier to read + #define WRAP_TENSOR_METHOD_1(method) \ + +[](const Tensor& self, const Tensor& other) -> std::shared_ptr { \ + return std::make_shared(self.method(other)); \ + } + + #define WRAP_SCALAR(method, T) \ + +[](const Tensor& self, T val) -> std::shared_ptr { \ + return std::make_shared(self.method(val)); \ + } + + #define WRAP_SCALAR_REVERSE(op, T) \ + +[](const Tensor& self, T val) -> std::shared_ptr { \ + return std::make_shared(val op self); \ + } + + // different, since those are not methods anymore + #define WRAP_FREE_MEMBER_FUNC_1(fPtr, T1, T2) \ + +[](const Tensor& self, int v1, int v2) -> std::shared_ptr { \ + return std::make_shared((self.*fPtr)(v1, v2)); \ + } + + #define WRAP_FREE_MEMBER_FUNC_2(fPtr, T1, T2, T3) \ + +[](const Tensor& self, T1 v1, T2 v2, T3 v3) -> std::shared_ptr { \ + return std::make_shared((self.*fPtr)(v1, v2, v3)); \ + } + + #define WRAP_FREE_FUNC_1(fPtr, T1) \ + +[](T1 v1) -> std::shared_ptr { \ + return std::make_shared((*fPtr)(v1)); \ + } + + #define WRAP_FREE_FUNC_2(fPtr, T1, T2) \ + +[](T1 v1, T2 v2) -> std::shared_ptr { \ + return std::make_shared((*fPtr)(v1, v2)); \ + } + + #define WRAP_FREE_FUNC_3(fPtr, T1, T2, T3) \ + +[](T1 v1, T2 v2, T3 v3) -> std::shared_ptr { \ + return std::make_shared((*fPtr)(v1, v2, v3)); \ + } + + #define WRAP_FREE_FUNC_4(fPtr, T) \ + +[](const Tensor& self, T val) -> std::shared_ptr { \ + return (*fPtr)(self.getSharedPtr(), val); \ + } + + #define WRAP_FREE_FUNC_5(fPtr) \ + +[](const Tensor& self, const Tensor& other) -> std::shared_ptr { \ + return (*fPtr)(self.getSharedPtr(), other.getSharedPtr()); \ + } + + #define WRAP_FREE_FUNC_6(fPtr, T) \ + +[](const Tensor& self, T val) -> std::shared_ptr { \ + return (*fPtr)(val, self.getSharedPtr()); \ + } + + #define WRAP_FREE_FUNC_7(fPtr) \ + +[](const Tensor& self) -> std::shared_ptr { \ + return (*fPtr)(self.getSharedPtr()); \ + } + + #define WRAP_FREE_FUNC_8(fPtr, T1, T2, T3, T4) \ + +[](T1 v1, T2 v2, T3 v3, T4 v4) -> std::shared_ptr { \ + return std::make_shared((*fPtr)(v1, v2, v3, v4)); \ + } + + #define WRAP_FREE_FUNC_9(fPtr, T1, T2, T3, T4, T5) \ + +[](T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) -> std::shared_ptr { \ + return std::make_shared((*fPtr)(v1, v2, v3, v4, v5)); \ + } + + #define WRAP_FUNC_AND_CONVERT_DTYPE_1(method) \ + +[](const Tensor& self, int v1) -> ftype { \ + return self.method(static_cast(v1)); \ + } + + #define WRAP_FUNC_AND_CONVERT_DTYPE_2(method) \ + +[](const Tensor& self, int v1, int v2) -> ftype { \ + return self.method(static_cast(v1), static_cast(v2)); \ + } + + #define WRAP_FUNC_AND_CONVERT_DTYPE_3(method) \ + +[](const Tensor& self, int v1, int v2, int v3) -> ftype { \ + return self.method(static_cast(v1), static_cast(v2), \ + static_cast(v3)); \ + } + + #define WRAP_FUNC_AND_CONVERT_DTYPE_4(method) \ + +[](const Tensor& self, int v1, int v2, int v3, int v4) -> ftype { \ + return self.method(static_cast(v1), static_cast(v2), \ + static_cast(v3), static_cast(v4)); \ + } + + // classes + class_("Dimension", no_init) + .add_property("list", &Dimension::get) + .def("__str__", &Py_Util::toString) + .def("__eq__", Py_DataModeling::dimEquals1) + .def("__eq__", Py_DataModeling::dimEquals2) + .def("__ne__", Py_DataModeling::nDimEquals1) + .def("__ne__", Py_DataModeling::nDimEquals2) + ; + + enum_("Device") + .value("CPU", Device::CPU) + .value("CUDA", Device::CUDA) + ; + + // register implicit dtype conversion + custom_converters::PyListToVectorConverter(); + custom_converters::PyListToVectorConverter(); + + // to convert std::shared_ptr to std::shared_ptr> in Python + boost::python::register_ptr_to_python< std::shared_ptr >(); + + // we manage via shared_ptr, since we deleted copy-ctor + class_, boost::noncopyable>("Tensor", no_init) + .def(init&, optional >()) + .def(init&, Device, optional >()) + .def(init&, const std::vector&, optional >()) + .def(init&, const std::vector&, Device, optional >()) + + // static creation methods + .def("ones", WRAP_FREE_FUNC_1(Py_DataModeling::Ones0, std::vector)) + .def("ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones1, std::vector, Device)) + .def("ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones2, std::vector, const bool)) + .def("ones", WRAP_FREE_FUNC_3(Py_DataModeling::Ones3, std::vector, Device, const bool)) + .staticmethod("ones") + + .def("zeros", WRAP_FREE_FUNC_1(Py_DataModeling::Zeros0, std::vector)) + .def("zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros1, std::vector, Device)) + .def("zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros2, std::vector, const bool)) + .def("zeros", WRAP_FREE_FUNC_3(Py_DataModeling::Zeros3, std::vector, Device, const bool)) + .staticmethod("zeros") + + .def("gauss", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian0, std::vector, ftype)) + .def("gauss", WRAP_FREE_FUNC_3(Py_DataModeling::Gaussian1, std::vector, Device, ftype)) + .def("gauss", WRAP_FREE_FUNC_3(Py_DataModeling::Gaussian2, std::vector, ftype, const bool)) + .def("gauss", WRAP_FREE_FUNC_8(Py_DataModeling::Gaussian3, std::vector, Device, ftype, const bool)) + .staticmethod("gauss") + + // properties + .add_property("device", &Tensor::getDevice, &Tensor::setDevice) + .add_property("dims", make_function(&Tensor::getDims, return_internal_reference<>())) + .add_property("grads", make_function(&Tensor::getGrads)) + .add_property("requiresGrad", &Tensor::getRequiresGrad, &Tensor::setRequiresGrad) + + // operators + .def("__str__", &Py_Util::toString) + .def("__repr__", &Py_Util::toString) + .def("__len__", &Tensor::getSize) + .def("__getitem__", WRAP_FREE_FUNC_4(&Py_DataModeling::getItemAsTensor1, tensorSize_t)) + .def("__getitem__", WRAP_FREE_FUNC_4(&Py_DataModeling::getItemAsTensor2, std::vector)) + .def("__setitem__", &Py_DataModeling::tensorSetItem) + + // arithmetics + .def("__matmul__", WRAP_FREE_FUNC_5(Py_DataModeling::matmul)) + .def("__add__", WRAP_FREE_FUNC_5(Py_DataModeling::elementwiseadd)) // elementwise add + .def("__add__", WRAP_FREE_FUNC_4(Py_DataModeling::scalaradd, ftype)) + .def("__radd__", WRAP_FREE_FUNC_6(Py_DataModeling::rscalaradd, ftype)) + + .def("__mul__", WRAP_FREE_FUNC_5(Py_DataModeling::elementwisemul)) // elementwise mult + .def("__mul__", WRAP_FREE_FUNC_4(Py_DataModeling::scalarmul, ftype)) + .def("__rmul__", WRAP_FREE_FUNC_6(Py_DataModeling::rscalarmul, ftype)) + + .def("__sub__", WRAP_FREE_FUNC_4(Py_DataModeling::scalarsub, ftype)) + .def("__truediv__", WRAP_FREE_FUNC_4(Py_DataModeling::scalardiv, ftype)) + + // member functions + .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_1(Tensor::get)) + .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_2(Tensor::get)) + .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_3(Tensor::get)) + .def("getitem", WRAP_FUNC_AND_CONVERT_DTYPE_4(Tensor::get)) + .def("getitem", Py_DataModeling::getItemVector) // the vector arg + + .def("sum", WRAP_FREE_FUNC_7(&(cgraph::sumTensor))) + + .def("reset", Py_DataModeling::reset1) + .def("reset", Py_DataModeling::reset2) + + .def("hasGrads", &Tensor::hasGrads) + .def("hasGrads", +[](const std::shared_ptr& t) -> bool { + return t->hasGrads(); + }) + + .def("transpose", WRAP_FREE_MEMBER_FUNC_1(Py_DataModeling::transpose1, int, int)) + .def("transpose", WRAP_FREE_MEMBER_FUNC_2(Py_DataModeling::transpose2, int, int, bool)) + .def("transposeThis", Py_DataModeling::transposeThis1) + .def("transposeThis", Py_DataModeling::transposeThis2) + + .def("backward", &Tensor::backward) + ; + + // free functions + def("Ones", WRAP_FREE_FUNC_1(Py_DataModeling::Ones0, std::vector)); + def("Ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones1, std::vector, Device)); + def("Ones", WRAP_FREE_FUNC_2(Py_DataModeling::Ones2, std::vector, const bool)); + def("Ones", WRAP_FREE_FUNC_3(Py_DataModeling::Ones3, std::vector, Device, const bool)); + + def("Zeros", WRAP_FREE_FUNC_1(Py_DataModeling::Zeros0, std::vector)); + def("Zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros1, std::vector, Device)); + def("Zeros", WRAP_FREE_FUNC_2(Py_DataModeling::Zeros2, std::vector, const bool)); + def("Zeros", WRAP_FREE_FUNC_3(Py_DataModeling::Zeros3, std::vector, Device, const bool)); + + def("Gaussian", WRAP_FREE_FUNC_2(Py_DataModeling::Gaussian0, std::vector, ftype)); + def("Gaussian", WRAP_FREE_FUNC_3(Py_DataModeling::Gaussian1, std::vector, Device, ftype)); + def("Gaussian", WRAP_FREE_FUNC_3(Py_DataModeling::Gaussian2, std::vector, ftype, const bool)); + def("Gaussian", WRAP_FREE_FUNC_8(Py_DataModeling::Gaussian3, std::vector, Device, ftype, const bool)); +} \ No newline at end of file diff --git a/src/python/data_modeling/py_data_modeling_util.cpp b/src/python/py_core/py_core_util.cpp similarity index 87% rename from src/python/data_modeling/py_data_modeling_util.cpp rename to src/python/py_core/py_core_util.cpp index d495300..581c99f 100644 --- a/src/python/data_modeling/py_data_modeling_util.cpp +++ b/src/python/py_core/py_core_util.cpp @@ -1,5 +1,5 @@ /** - * @file py_data_modeling_util.cpp + * @file py_core_util.cpp * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) * @brief * @version 0.1 @@ -9,7 +9,7 @@ * */ -#include "py_data_modeling_util.h" +#include "py_core_util.h" #include #include @@ -22,7 +22,7 @@ ftype Py_DataModeling::tensorGetItem(const Tensor& self, boost::python::object i // Single integer index (1D) if(int_extractor.check()) { auto i0 = static_cast(int_extractor()); - return self.getItem(i0); + return self.get(i0); } // Tuple index (2D, 3D, or 4D, or list) @@ -32,25 +32,25 @@ ftype Py_DataModeling::tensorGetItem(const Tensor& self, boost::python::object i // Dispatch to convenience functions for 1-4 args if (len == 1) { auto i0 = static_cast(extract(index[0])); - return self.getItem(i0); + return self.get(i0); } else if (len == 2) { auto i0 = static_cast(extract(index[0])); auto i1 = static_cast(extract(index[1])); - return self.getItem(i0, i1); + return self.get(i0, i1); } else if (len == 3) { auto i0 = static_cast(extract(index[0])); auto i1 = static_cast(extract(index[1])); auto i2 = static_cast(extract(index[2])); - return self.getItem(i0, i1, i2); + return self.get(i0, i1, i2); } else if (len == 4) { auto i0 = static_cast(extract(index[0])); auto i1 = static_cast(extract(index[1])); auto i2 = static_cast(extract(index[2])); auto i3 = static_cast(extract(index[3])); - return self.getItem(i0, i1, i2, i3); + return self.get(i0, i1, i2, i3); } else { // Arbitrary length - use vector version @@ -58,7 +58,7 @@ ftype Py_DataModeling::tensorGetItem(const Tensor& self, boost::python::object i for (int i = 0; i < len; ++i) { indices.push_back(static_cast(extract(index[i]))); } - return self.getItem(std::move(indices)); + return self.get(std::move(indices)); } } @@ -71,7 +71,7 @@ void Py_DataModeling::tensorSetItem(Tensor& self, boost::python::object index, f extract int_extractor(index); if(int_extractor.check()) { auto i0 = static_cast(int_extractor()); - self.setItem(value, i0); + self.set(value, i0); return; } @@ -83,25 +83,25 @@ void Py_DataModeling::tensorSetItem(Tensor& self, boost::python::object index, f // Dispatch to convenience functions for 1-4 args if (len == 1) { auto i0 = static_cast(extract(index[0])); - self.setItem(value, i0); + self.set(value, i0); } else if (len == 2) { auto i0 = static_cast(extract(index[0])); auto i1 = static_cast(extract(index[1])); - self.setItem(value, i0, i1); + self.set(value, i0, i1); } else if (len == 3) { auto i0 = static_cast(extract(index[0])); auto i1 = static_cast(extract(index[1])); auto i2 = static_cast(extract(index[2])); - self.setItem(value, i0, i1, i2); + self.set(value, i0, i1, i2); } else if (len == 4) { auto i0 = static_cast(extract(index[0])); auto i1 = static_cast(extract(index[1])); auto i2 = static_cast(extract(index[2])); auto i3 = static_cast(extract(index[3])); - self.setItem(value, i0, i1, i2, i3); + self.set(value, i0, i1, i2, i3); } else { // Arbitrary length - use vector version @@ -109,7 +109,7 @@ void Py_DataModeling::tensorSetItem(Tensor& self, boost::python::object index, f for (int i = 0; i < len; ++i) { indices.push_back(static_cast(extract(index[i]))); } - self.setItem(value, std::move(indices)); + self.set(value, std::move(indices)); } return; } diff --git a/src/python/py_core/py_core_util.h b/src/python/py_core/py_core_util.h new file mode 100644 index 0000000..7aa01d0 --- /dev/null +++ b/src/python/py_core/py_core_util.h @@ -0,0 +1,136 @@ +/** + * @file util.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief Helper and wrapper functions + * @version 0.1 + * @date 2026-02-21 + * + * @copyright Copyright (c) 2026 + * + */ + +#pragma once + +#include "data_modeling/dim_type.h" +#include "utility/initializers.h" + +#include "data_modeling/tensor.h" +#include "data_modeling/tensor_functions.h" +#include "computational_graph/tensor_ops/graph_creation.h" + +#include +#include + +#include + +namespace Py_DataModeling { + + /********************************************************************************************************* + ********************************************** Dimension ************************************************* + *********************************************************************************************************/ + + inline bool (Dimension::*dimEquals1)(const Dimension&) const = &Dimension::operator==; + inline bool (Dimension::*dimEquals2)(const std::vector&) const = &Dimension::operator==; + + inline bool (Dimension::*nDimEquals1)(const Dimension&) const = &Dimension::operator!=; + inline bool (Dimension::*nDimEquals2)(const std::vector&) const = &Dimension::operator!=; + /********************************************************************************************************* + *********************************************** Tensor *************************************************** + *********************************************************************************************************/ + + ftype tensorGetItem(const Tensor& self, boost::python::object index); + void tensorSetItem(Tensor& self, boost::python::object index, ftype value); + + // need wrappers for default arguments, see + // https://beta.boost.org/doc/libs/develop/libs/python/doc/html/tutorial/tutorial/functions.html + inline auto OnesWrapper0(std::vector dims) { + return TensorFunctions::Ones(std::move(dims)); + } + + inline auto OnesWrapper1(std::vector dims, Device d) { + return TensorFunctions::Ones(std::move(dims), d); + } + + inline auto ZerosWrapper0(std::vector dims) { + return TensorFunctions::Zeros(std::move(dims)); + } + + inline auto ZerosWrapper1(std::vector dims, Device d) { + return TensorFunctions::Zeros(std::move(dims), d); + } + + inline auto GaussianWrapper0(std::vector dims, ftype stddev) { + return TensorFunctions::Gaussian(std::move(dims), stddev); + } + + inline auto GaussianWrapper1(std::vector dims, Device d, ftype stddev) { + return TensorFunctions::Gaussian(std::move(dims), d, stddev); + } + + inline Tensor (*Ones0)(std::vector) = &OnesWrapper0; + inline Tensor (*Ones1)(std::vector, Device) = &OnesWrapper1; + inline Tensor (*Ones2)(std::vector, const bool) = &(TensorFunctions::Ones); + inline Tensor (*Ones3)(std::vector, Device, const bool) = &(TensorFunctions::Ones); + + inline Tensor (*Zeros0)(std::vector) = &ZerosWrapper0; + inline Tensor (*Zeros1)(std::vector, Device) = &ZerosWrapper1; + inline Tensor (*Zeros2)(std::vector, const bool) = &(TensorFunctions::Zeros); + inline Tensor (*Zeros3)(std::vector, Device, const bool) = &(TensorFunctions::Zeros); + + inline Tensor (*Gaussian0)(std::vector, ftype) = &GaussianWrapper0; + inline Tensor (*Gaussian1)(std::vector, Device, ftype) = &GaussianWrapper1; + inline Tensor (*Gaussian2)(std::vector, ftype, const bool) = &(TensorFunctions::Gaussian); + inline Tensor (*Gaussian3)(std::vector, Device, ftype, const bool) = &(TensorFunctions::Gaussian); + + inline void (Tensor::*reset1)(const ftype) = &Tensor::reset; + inline void (Tensor::*reset2)(const std::shared_ptr) = &Tensor::reset; + + inline void (Tensor::*transposeThis1)() = &Tensor::transposeThis; + inline void (Tensor::*transposeThis2)(int, int) = &Tensor::transposeThis; + inline Tensor (Tensor::*transpose1)(int, int) const = &Tensor::transpose; + inline Tensor (Tensor::*transpose2)(int, int, bool) const = &Tensor::transpose; + + inline ftype (Tensor::*getItemVector)(const std::vector&) const = &Tensor::get; + + /********************************************************************************************************* + ***************************************** Graph creation ************************************************* + *********************************************************************************************************/ + + // multiplications + inline std::shared_ptr (*elementwisemul) + (const std::shared_ptr left, const std::shared_ptr right) = &(cgraph::mul); + + inline std::shared_ptr (*scalarmul) + (const std::shared_ptr, ftype) = &(cgraph::mul); + + inline std::shared_ptr (*rscalarmul) + (ftype, const std::shared_ptr) = &(cgraph::mul); + + // additions + inline std::shared_ptr (*elementwiseadd) + (const std::shared_ptr left, const std::shared_ptr right) = &(cgraph::add); + + inline std::shared_ptr (*scalaradd) + (const std::shared_ptr, ftype) = &(cgraph::add); + + inline std::shared_ptr (*rscalaradd) + (ftype, const std::shared_ptr) = &(cgraph::add); + + // matmul + inline std::shared_ptr (*matmul) + (const std::shared_ptr left, const std::shared_ptr right) = &(cgraph::matmul); + + // sub, div + inline std::shared_ptr (*scalarsub) + (const std::shared_ptr, ftype) = &(cgraph::sub); + + inline std::shared_ptr (*scalardiv) + (const std::shared_ptr, ftype) = &(cgraph::div); + + // get + inline std::shared_ptr (*getItemAsTensor1) + (const std::shared_ptr& t, tensorSize_t idx) = &(cgraph::get); + + inline std::shared_ptr (*getItemAsTensor2) + (const std::shared_ptr& t, const std::vector& idx) = &(cgraph::get); +} \ No newline at end of file diff --git a/src/python/py_nn/py_nn.cpp b/src/python/py_nn/py_nn.cpp new file mode 100644 index 0000000..5eb175a --- /dev/null +++ b/src/python/py_nn/py_nn.cpp @@ -0,0 +1,89 @@ +/** + * @file layers.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2025-11-17 + * + * @copyright Copyright (c) 2025 + * + */ + +#include "py_nn_util.h" +#include "python_templates.h" +#include "custom_converters.h" +#include "utility/global_params.h" + +#include + +#include + +BOOST_PYTHON_MODULE(_nn) +{ + using namespace Py_Util; + + using namespace boost::python; + + #define WRAP_METHOD_ONE_TENSORARG(T, method) \ + +[](const T& self, Tensor& t) -> std::shared_ptr { \ + return (self.*method)(t.getSharedPtr()); \ + } + + #define WRAP_METHOD_TWO_TENSORARGS(T, method) \ + +[](const T& self, Tensor& t1, Tensor& t2) -> std::shared_ptr { \ + return (self.*method)(t1.getSharedPtr(), t2.getSharedPtr()); \ + } + + // register vector of shared_ptr converter; needed for ModuleBase::parameters() + class_>>("TensorList") + .def(vector_indexing_suite>>()) + ; + + // convert python list of tensors back to c++ + converter::registry::push_back( + &custom_converters::TensorListFromPython::convertible, + &custom_converters::TensorListFromPython::construct, + type_id>>()); + + // Networks + class_, boost::noncopyable>("_Module", no_init) + // methods + .def("_own_parameters", &module::ModuleBase::parameters) + // operators + .def("forward", pure_virtual(WRAP_METHOD_ONE_TENSORARG(Py_nn::ModuleBaseWrapper, Py_nn::moduleForward))) + .def("__str__", &toString) + ; + + class_, boost::noncopyable>("FfLayer", no_init) + // init + .def(init()) + .def(init()) + .def(init()) + .def(init()) + .def(init()) + .def(init()) + // methods + .add_property("dims", make_function(&module::FfLayer::getDims, return_internal_reference<>())) + .add_property("weights", &module::FfLayer::getWeights) + .add_property("bias", &module::FfLayer::getBias) + .add_property("params", &module::ModuleBase::parameters) + // operators + .def("__call__", WRAP_METHOD_ONE_TENSORARG(module::FfLayer, Py_nn::ffForward)) + .def("__str__", &toString) + ; + + class_, boost::noncopyable>("ReLU") + .def("__call__", WRAP_METHOD_ONE_TENSORARG(module::ReLu, Py_nn::reluF)) + .def("__str__", &toString) + ; + + class_, boost::noncopyable>("LeakyReLU", init()) + .def("__call__", WRAP_METHOD_ONE_TENSORARG(module::LeakyReLu, Py_nn::leakyReluF)) + .def("__str__", &toString) + ; + + class_, boost::noncopyable>("Softmax") + .def("__call__", WRAP_METHOD_ONE_TENSORARG(module::Softmax, Py_nn::softmaxF)) + .def("__str__", &toString) + ; +} \ No newline at end of file diff --git a/src/python/py_nn/py_nn_util.h b/src/python/py_nn/py_nn_util.h new file mode 100644 index 0000000..766589a --- /dev/null +++ b/src/python/py_nn/py_nn_util.h @@ -0,0 +1,54 @@ +/** + * @file layers.h + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2025-11-17 + * + * @copyright Copyright (c) 2025 + * + */ + +#pragma once + +#include "module/module_base.h" + +#include "module/layers/ff_layer.h" + +#include "module/activation_functions/relu.h" +#include "module/activation_functions/leaky_relu.h" +#include "module/activation_functions/softmax.h" + +#include +#include +#include +#include + +namespace Py_nn { + using namespace boost::python; + + /** + * @brief Wrapper class needed for Boost Python to get the virtual function working + * the way it is intended. See documentation here: + * https://beta.boost.org/doc/libs/develop/libs/python/doc/html/tutorial/tutorial/exposing.html + * + */ + struct ModuleBaseWrapper : module::ModuleBase, wrapper { + std::shared_ptr operator()(const std::shared_ptr& input) const override { + return this->get_override("forward")(input); + } + + Tensor operator()(const Tensor& input) const override { + std::__throw_runtime_error("This function should never be called from within Python"); + } + }; + + inline std::shared_ptr (ModuleBaseWrapper::*moduleForward)(const std::shared_ptr&) const = &ModuleBaseWrapper::operator(); + + inline std::shared_ptr (module::FfLayer::*ffForward)(const std::shared_ptr&) const = &module::FfLayer::operator(); + + inline std::shared_ptr (module::ReLu::*reluF)(const std::shared_ptr&) const = &module::ReLu::operator(); + inline std::shared_ptr (module::LeakyReLu::*leakyReluF)(const std::shared_ptr&) const = &module::LeakyReLu::operator(); + inline std::shared_ptr (module::Softmax::*softmaxF)(const std::shared_ptr&) const = &module::Softmax::operator(); +} + diff --git a/src/python/py_sys/py_sys.cpp b/src/python/py_sys/py_sys.cpp new file mode 100644 index 0000000..af7d905 --- /dev/null +++ b/src/python/py_sys/py_sys.cpp @@ -0,0 +1,24 @@ +/** + * @file py_sys.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-08 + * + * @copyright Copyright (c) 2026 + * + */ + + +#include "system/sys_functions.h" + +#include + +BOOST_PYTHON_MODULE(_sys) +{ + using namespace boost::python; + + def("setDevice", &sys::setDevice); + def("getDevice", &sys::getDevice); + def("setSeed", &sys::setRandomSeed); +} \ No newline at end of file diff --git a/src/python/py_train/py_train.cpp b/src/python/py_train/py_train.cpp new file mode 100644 index 0000000..9cbb23f --- /dev/null +++ b/src/python/py_train/py_train.cpp @@ -0,0 +1,66 @@ +/** + * @file py_train.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-14 + * + * @copyright Copyright (c) 2026 + * + */ + +#include + +#include "utility/global_params.h" + +#include "training/loss_functions/bce_loss.h" +#include "training/loss_functions/bce_sigmoid_loss.h" +#include "training/loss_functions/crossentropy_loss.h" +#include "training/loss_functions/crossentropy_softmax_loss.h" + +#include "training/optimizers/sgd.h" +#include "training/optimizers/rmsprop.h" + +#include "training/trainers/base_train_loop.h" + +BOOST_PYTHON_MODULE(_train) +{ + using namespace boost::python; + + // Loss functions + class_, boost::noncopyable>("BCE") + .def("__call__", &train::BceLoss::operator()) + ; + + class_, boost::noncopyable>("BceWithSigmoid") + .def("__call__", &train::BceSigmoidLoss::operator()) + ; + + class_, boost::noncopyable>("CrossEntropy") + .def("__call__", &train::CrossEntropyLoss::operator()) + ; + + class_, boost::noncopyable>("CrossEntropyWithSoftmax") + .def("__call__", &train::CrossEntropySoftmaxLoss::operator()) + ; + + // Optimizers + class_, boost::noncopyable>("SGD", no_init) + .def(init >, ftype>()) + .def("step", &train::SgdOptimizer::step) + .def("zeroGrad", &train::SgdOptimizer::zeroGrad) + ; + + class_, boost::noncopyable>("RmsProp", no_init) + .def(init >, ftype, ftype>()) + .def("step", &train::RmsPropOptimizer::step) + .def("zeroGrad", &train::RmsPropOptimizer::zeroGrad) + ; + + // Trainers + class_, boost::noncopyable>("TrainLoop", no_init) + .def(init&, std::shared_ptr, + std::shared_ptr, size_t, tensorDim_t>()) + .def("run", &train::BaseTrainLoop::run) + ; +} \ No newline at end of file diff --git a/src/python/python_utility/custom_converters.h b/src/python/py_utility/custom_converters.h similarity index 69% rename from src/python/python_utility/custom_converters.h rename to src/python/py_utility/custom_converters.h index 5114d7f..68c1ea7 100644 --- a/src/python/python_utility/custom_converters.h +++ b/src/python/py_utility/custom_converters.h @@ -50,12 +50,70 @@ namespace custom_converters { static void* convertible(PyObject* obj_ptr); static void construct(PyObject* obj_ptr,rvalueFromPythonData* data); }; + + /** + * @brief Convert from Python list to std::vector> + */ + struct TensorListFromPython { + using rvalueFromPythonData = boost::python::converter::rvalue_from_python_stage1_data; + + static void* convertible(PyObject* obj); + static void construct(PyObject* obj, rvalueFromPythonData* data); + }; } +// TODO: do array instead of tensor +/* struct DimsFromPython { + static void* convertible(PyObject* obj) { + if (!PyTuple_Check(obj) && !PyList_Check(obj)) return nullptr; + return obj; + } + + static void construct(PyObject* obj, + bp::converter::rvalue_from_python_stage1_data* data) { + void* storage = ((bp::converter::rvalue_from_python_object_data*)data)->storage.bytes; + Dims* dims = new (storage) Dims(); + int len = PySequence_Length(obj); + dims->ndim = len; + for (int i = 0; i < len; i++) + dims->data[i] = bp::extract(PySequence_GetItem(obj, i)); + data->convertible = storage; + } +}; + +// register it in your module init: +bp::converter::registry::push_back( + &DimsFromPython::convertible, + &DimsFromPython::construct, + bp::type_id()); */ + /******************************************************************************************/ /******************************************************************************************/ /******************************************************************************************/ +void* custom_converters::TensorListFromPython::convertible(PyObject* obj) { + using namespace boost::python; + if (!PyList_Check(obj)) return nullptr; + return obj; +} + +void custom_converters::TensorListFromPython::construct(PyObject* obj, rvalueFromPythonData* data) { + using namespace boost::python; + void* storage = ((converter::rvalue_from_python_storage< std::vector> >*)data)->storage.bytes; + //void* storage = ((converter::rvalue_from_python_storage< std::vector >*)data)->storage.bytes; + + new (storage) std::vector>(); + auto* vec = reinterpret_cast>*>(storage); + + int len = PyList_Size(obj); + vec->reserve(len); + for (int i = 0; i < len; i++) { + vec->push_back(extract>( + PyList_GetItem(obj, i))); + } + data->convertible = storage; +} + template requires ( std::is_integral_v< T > || std::is_floating_point_v< T >) diff --git a/src/python/python_utility/python_templates.h b/src/python/py_utility/python_templates.h similarity index 52% rename from src/python/python_utility/python_templates.h rename to src/python/py_utility/python_templates.h index 54217d2..e0e625c 100644 --- a/src/python/python_utility/python_templates.h +++ b/src/python/py_utility/python_templates.h @@ -24,15 +24,4 @@ namespace Py_Util { oss << obj; return oss.str(); } - - /** - * @brief Because we manage tensors via shared_ptr, we need this to wrap - * return values when a function/method demands it. - */ - /* template - auto WrapReturnedTensor(Func f) { - return [f](const Tensor& self, auto&&... args) -> std::shared_ptr { - return std::make_shared(f(self, std::forward(args)...)); - }; - } */ } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3b258b9..c8ca76b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,7 +9,10 @@ FetchContent_MakeAvailable(googletest) add_executable(unit_tests_backend backend/test_data_modeling.cpp backend/test_computational_graph.cpp -) + backend/test_module.cpp + backend/test_losses.cpp + backend/test_train_loop.cpp + ) target_link_libraries(unit_tests_backend PRIVATE gtest_main # pre-built main, avoids boilerplate if no custom initialization needed diff --git a/tests/backend/test_computational_graph.cpp b/tests/backend/test_computational_graph.cpp index d2a686f..ef07e65 100644 --- a/tests/backend/test_computational_graph.cpp +++ b/tests/backend/test_computational_graph.cpp @@ -14,7 +14,7 @@ #include "data_modeling/tensor.h" #include "data_modeling/tensor_functions.h" -#include "computational_graph/graph_creation.h" +#include "computational_graph/tensor_ops/graph_creation.h" #include @@ -22,7 +22,7 @@ TEST(AutogradTest, ThrowsIfNoGradientSet) { auto t1 = TensorFunctions::makeSharedTensor({1}, {3.0}, false); auto t2 = TensorFunctions::makeSharedTensor({1}, {2.0}, false); - auto loss = graph::add(t1, t2); + auto loss = cgraph::add(t1, t2); EXPECT_THROW(loss->backward(), std::runtime_error); } @@ -31,37 +31,67 @@ TEST(AutogradTest, SimpleAddition) { auto t1 = TensorFunctions::makeSharedTensor({1}, {3.0}, true); auto t2 = TensorFunctions::makeSharedTensor({1}, {2.0}, true); - auto t3 = graph::add(t1, t2); - auto loss = graph::mul(t3, t3); + auto t3 = cgraph::add(t1, t2); + auto loss = cgraph::mul(t3, t3); loss->backward(); - EXPECT_NEAR(t1->getGrads()->getItem(0), 10.0, 1e-5); - EXPECT_NEAR(t2->getGrads()->getItem(0), 10.0, 1e-5); + EXPECT_NEAR(t1->getGrads()->get(0), 10.0, 1e-5); + EXPECT_NEAR(t2->getGrads()->get(0), 10.0, 1e-5); +} + +TEST(AutogradTest, BroadcastAdd) { + // gradient of broadcast add w.r.t. bias should be sum over batch dimension + // upstream grad: (2,3) of ones → bias grad should be (3) of twos + auto t1 = TensorFunctions::makeSharedTensor({2, 3}, + {1.0, 2.0, 3.0, + 4.0, 5.0, 6.0}, true); + auto bias = TensorFunctions::makeSharedTensor({3}, + {0.0, 0.0, 0.0}, true); + + auto res = cgraph::add(t1, bias); + + // set upstream grad to ones and backprop + auto upstreamGrad = TensorFunctions::makeSharedTensor({2, 3}, + {1.0, 1.0, 1.0, + 1.0, 1.0, 1.0}, false); + res->backward(); + + // bias grad should be sum over batch: [2, 2, 2] + auto biasGrad = bias->getGrads(); + ASSERT_DOUBLE_EQ((*biasGrad)[0], 2.0); + ASSERT_DOUBLE_EQ((*biasGrad)[1], 2.0); + ASSERT_DOUBLE_EQ((*biasGrad)[2], 2.0); + + // t1 grad should be ones (add is identity for non-broadcast operand) + auto t1Grad = t1->getGrads(); + for(int i = 0; i < 6; i++) { + ASSERT_DOUBLE_EQ((*t1Grad)[i], 1.0); + } } TEST(AutogradTest, ScalarMultiplication) { auto t1 = TensorFunctions::makeSharedTensor({1}, {2.0}, true); auto t2 = TensorFunctions::makeSharedTensor({1}, {3.0}, true); - auto t3 = graph::mul(t1, t2); - auto loss = graph::mul(t3, t3); + auto t3 = cgraph::mul(t1, t2); + auto loss = cgraph::mul(t3, t3); loss->backward(); - ASSERT_DOUBLE_EQ(t1->getGrads()->getItem(0), 36.0); - ASSERT_DOUBLE_EQ(t2->getGrads()->getItem(0), 24.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->get(0), 36.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->get(0), 24.0); } TEST(AutogradTest, MatMul) { auto t1 = TensorFunctions::makeSharedTensor({2, 3}, {1, 2, 3, 4, 5, 6}, true); auto t2 = TensorFunctions::makeSharedTensor({3, 2}, {1, 2, 3, 4, 5, 6}, true); - auto t3 = graph::matmul(t1, t2); + auto t3 = cgraph::matmul(t1, t2); auto loss = TensorFunctions::makeSharedTensor({1}, {0.0}, true); for (size_t i = 0; i < t3->getSize(); ++i) { - loss = graph::add(loss, graph::get(t3, i)); + loss = cgraph::add(loss, cgraph::get(t3, i)); } loss->backward(); @@ -70,65 +100,51 @@ TEST(AutogradTest, MatMul) { EXPECT_TRUE(t2->hasGrads()); // dL/dt1 = dloss/dt3 @ t2^t = Ones({2, 2}) @ t2^t - ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({0, 0}), 3.0); - ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({0, 1}), 7.0); - ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({0, 2}), 11.0); - ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({1, 0}), 3.0); - ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({1, 1}), 7.0); - ASSERT_DOUBLE_EQ(t1->getGrads()->getItem({1, 2}), 11.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->get({0, 0}), 3.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->get({0, 1}), 7.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->get({0, 2}), 11.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->get({1, 0}), 3.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->get({1, 1}), 7.0); + ASSERT_DOUBLE_EQ(t1->getGrads()->get({1, 2}), 11.0); // dL/dt2 = t1^t @ dloss/dt3 = t1^t @ Ones({2, 2}) - ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({0, 0}), 5.0); - ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({0, 1}), 5.0); - ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({1, 0}), 7.0); - ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({1, 1}), 7.0); - ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({2, 0}), 9.0); - ASSERT_DOUBLE_EQ(t2->getGrads()->getItem({2, 1}), 9.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->get({0, 0}), 5.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->get({0, 1}), 5.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->get({1, 0}), 7.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->get({1, 1}), 7.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->get({2, 0}), 9.0); + ASSERT_DOUBLE_EQ(t2->getGrads()->get({2, 1}), 9.0); } TEST(AutogradTest, ChainRule) { auto x = TensorFunctions::makeSharedTensor({1}, {2.0}, true); - auto y = graph::mul(x, x); // y = x^2 - auto z = graph::add(x, y); // z = x^2 + x - auto loss = graph::mul(z, z); // loss = (x^2 + x)^2 + auto y = cgraph::mul(x, x); // y = x^2 + auto z = cgraph::add(x, y); // z = x^2 + x + auto loss = cgraph::mul(z, z); // loss = (x^2 + x)^2 loss->backward(); // dloss/dx = 2(x^2 + x) * (2x + 1) // At x=2: 2(4 + 2) * (4 + 1) = 2 * 6 * 5 = 60 - ASSERT_DOUBLE_EQ(x->getGrads()->getItem(0), 60.0); + ASSERT_DOUBLE_EQ(x->getGrads()->get(0), 60.0); } TEST(AutogradTest, MultiVariateChainRule) { auto x = TensorFunctions::makeSharedTensor({2}, {1.0, 2.0}, true); - auto y = graph::mul(x, 3.0); // y = [3, 6] + auto y = cgraph::mul(x, 3.0); // y = [3, 6] auto loss = TensorFunctions::makeSharedTensor({1}, {0.0}, true); for(int i=0; igetSize(); i++){ - loss = graph::add(loss, graph::get(y, i)); + loss = cgraph::add(loss, cgraph::get(y, i)); } // loss = 9 loss->backward(); // dloss/dx = scalar = 3 - ASSERT_DOUBLE_EQ(x->getGrads()->getItem(0), 3.0); - ASSERT_DOUBLE_EQ(x->getGrads()->getItem(1), 3.0); - - ASSERT_DOUBLE_EQ(y->getGrads()->getItem(0), 1.0); - ASSERT_DOUBLE_EQ(y->getGrads()->getItem(1), 1.0); -} + ASSERT_DOUBLE_EQ(x->getGrads()->get(0), 3.0); + ASSERT_DOUBLE_EQ(x->getGrads()->get(1), 3.0); -/* TEST(AutogradTest, ReLU) { - Tensor x({3}, {-1.0, 0.0, 2.0}, true); - - Tensor y = relu(x); // [0, 0, 2] - Tensor loss = sum(y); // loss = 2 - - loss.backward(); - - // Gradient: [0, 0, 1] (only where input > 0) - EXPECT_NEAR(t.getGrads()->getItem(0), 0.0, 1e-5); - EXPECT_NEAR(t.getGrads()->getItem(1), 0.0, 1e-5); - EXPECT_NEAR(t.getGrads()->getItem(2), 1.0, 1e-5); -} */ \ No newline at end of file + ASSERT_DOUBLE_EQ(y->getGrads()->get(0), 1.0); + ASSERT_DOUBLE_EQ(y->getGrads()->get(1), 1.0); +} \ No newline at end of file diff --git a/tests/backend/test_data_modeling.cpp b/tests/backend/test_data_modeling.cpp index 3bff63f..69f585b 100644 --- a/tests/backend/test_data_modeling.cpp +++ b/tests/backend/test_data_modeling.cpp @@ -23,10 +23,10 @@ TEST(TensorOpsTest, TestCtor) { ASSERT_EQ(t.getDevice(), Device::CPU); ASSERT_TRUE(!t.getRequiresGrad()); - ASSERT_DOUBLE_EQ(t.getItem(0, 0), 2.0); - ASSERT_DOUBLE_EQ(t.getItem(0, 1), 3.0); - ASSERT_DOUBLE_EQ(t.getItem(1, 0), 4.0); - ASSERT_DOUBLE_EQ(t.getItem(1, 1), 5.0); + ASSERT_DOUBLE_EQ(t.get(0, 0), 2.0); + ASSERT_DOUBLE_EQ(t.get(0, 1), 3.0); + ASSERT_DOUBLE_EQ(t.get(1, 0), 4.0); + ASSERT_DOUBLE_EQ(t.get(1, 1), 5.0); } TEST(TensorOpsTest, ScalarAddWorks) { @@ -35,9 +35,9 @@ TEST(TensorOpsTest, ScalarAddWorks) { auto res = t1 + 1.5; constexpr ftype sum = 2.5; - for(auto i=0; i + +#include "data_modeling/tensor_functions.h" + +#include "training/loss_functions/rmse_loss.h" +#include "training/loss_functions/bce_loss.h" +#include "training/loss_functions/crossentropy_loss.h" + +#include + +using namespace train; + +static constexpr ftype kTol = 1e-4f; + +// ─── CrossEntropy ──────────────────────────────────────────────────────────── + +TEST(LossTest, CrossEntropyFoward) { + auto y = TensorFunctions::makeSharedTensor( + {2, 3}, {1.0, 0.0, 0.0, + 0.0, 1.0, 0.0}, false); + + auto ypred = TensorFunctions::makeSharedTensor( + {2, 3}, {0.7, 0.2, 0.1, + 0.1, 0.8, 0.1}, true); + + CrossEntropyLoss loss; + auto result = loss(y, ypred); + + // expected: -( log(0.7) + log(0.8) ) / 2 = 0.2899 + const ftype expected = -(std::log(0.7f) + std::log(0.8f)) / 2.0f; + EXPECT_NEAR((*result)[0], expected, kTol); +} + +TEST(LossTest, CrossEntropyPerfectPrediction) { + auto y = TensorFunctions::makeSharedTensor( + {2, 3}, {1.0, 0.0, 0.0, + 0.0, 1.0, 0.0}, false); + + // near-perfect predictions — can't use exactly 1.0 due to log(0) + auto ypred = TensorFunctions::makeSharedTensor( + {2, 3}, {0.999, 0.0005, 0.0005, + 0.0005, 0.999, 0.0005}, true); + + CrossEntropyLoss loss; + auto result = loss(y, ypred); + + // loss should be very small + EXPECT_LT((*result)[0], 0.01f); +} + +TEST(LossTest, CrossEntropyUniformPrediction) { + // uniform prediction should give log(3) ~ 1.0986 + auto y = TensorFunctions::makeSharedTensor( + {1, 3}, {1.0, 0.0, 0.0}, false); + + auto ypred = TensorFunctions::makeSharedTensor( + {1, 3}, {1.0f/3, 1.0f/3, 1.0f/3}, true); + + CrossEntropyLoss loss; + auto result = loss(y, ypred); + + EXPECT_NEAR((*result)[0], std::log(3.0f), kTol); +} + +TEST(LossTest, CrossEntropyThrowsOnDimMismatch) { + auto y = TensorFunctions::makeSharedTensor( + {2, 3}, {1.0, 0.0, 0.0, 0.0, 1.0, 0.0}, false); + auto ypred = TensorFunctions::makeSharedTensor( + {2, 2}, {0.5, 0.5, 0.5, 0.5}, true); + + CrossEntropyLoss loss; + EXPECT_THROW(loss(y, ypred), std::invalid_argument); +} + +TEST(LossTest, CrossEntropyBackward) { + // y = [[1,0,0],[0,1,0]], ypred = [[0.7,0.2,0.1],[0.1,0.8,0.1]] + // grad CE w.r.t. ypred[b,i] = -y[b,i] / (ypred[b,i] * n) + // grad[0,0] = -1/(0.7*2) = -0.7143 + // grad[0,1] = 0 + // grad[0,2] = 0 + // grad[1,0] = 0 + // grad[1,1] = -1/(0.8*2) = -0.625 + // grad[1,2] = 0 + auto y = TensorFunctions::makeSharedTensor( + {2, 3}, {1.0, 0.0, 0.0, + 0.0, 1.0, 0.0}, false); + auto ypred = TensorFunctions::makeSharedTensor( + {2, 3}, {0.7, 0.2, 0.1, + 0.1, 0.8, 0.1}, true); + + CrossEntropyLoss loss; + auto result = loss(y, ypred); + result->backward(); + + auto grads = ypred->getGrads(); + EXPECT_NEAR((*grads)[0], -0.7143f, kTol); + EXPECT_NEAR((*grads)[1], 0.0f, kTol); + EXPECT_NEAR((*grads)[2], 0.0f, kTol); + EXPECT_NEAR((*grads)[3], 0.0f, kTol); + EXPECT_NEAR((*grads)[4], -0.625f, kTol); + EXPECT_NEAR((*grads)[5], 0.0f, kTol); +} + +// ─── BCE ───────────────────────────────────────────────────────────────────── + +TEST(LossTest, BceForward) { + auto y = TensorFunctions::makeSharedTensor( + {4, 1}, {0.0, 1.0, 1.0, 0.0}, false); + + auto ypred = TensorFunctions::makeSharedTensor( + {4, 1}, {0.1, 0.9, 0.8, 0.2}, true); + + BceLoss loss; + auto result = loss(y, ypred); + + // expected: -( log(0.9) + log(0.9) + log(0.8) + log(0.8) ) / 4 = 0.1643 + const ftype expected = -(std::log(0.9f) + std::log(0.9f) + + std::log(0.8f) + std::log(0.8f)) / 4.0f; + EXPECT_NEAR((*result)[0], expected, kTol); +} + +TEST(LossTest, BcePerfectPrediction) { + auto y = TensorFunctions::makeSharedTensor( + {2, 1}, {1.0, 0.0}, false); + + auto ypred = TensorFunctions::makeSharedTensor( + {2, 1}, {0.999, 0.001}, true); + + BceLoss loss; + auto result = loss(y, ypred); + + EXPECT_LT((*result)[0], 0.01f); +} + +TEST(LossTest, BceRandomPrediction) { + // ypred = 0.5 for all -> loss = log(2) ~ 0.6931 + auto y = TensorFunctions::makeSharedTensor( + {2, 1}, {1.0, 0.0}, false); + + auto ypred = TensorFunctions::makeSharedTensor( + {2, 1}, {0.5, 0.5}, true); + + BceLoss loss; + auto result = loss(y, ypred); + + EXPECT_NEAR((*result)[0], std::log(2.0f), kTol); +} + +TEST(LossTest, BceThrowsOnDimMismatch) { + auto y = TensorFunctions::makeSharedTensor( + {2, 1}, {1.0, 0.0}, false); + auto ypred = TensorFunctions::makeSharedTensor( + {3, 1}, {0.5, 0.5, 0.5}, true); + + BceLoss loss; + EXPECT_THROW(loss(y, ypred), std::invalid_argument); +} + +TEST(LossTest, BceNoInfOrNanOnNearZeroPred) { + auto y = TensorFunctions::makeSharedTensor( + {1, 1}, {1.0}, false); + auto ypred = TensorFunctions::makeSharedTensor( + {1, 1}, {0.0}, true); + + BceLoss loss; + auto result = loss(y, ypred); + + // clipping prevents log(0) + EXPECT_FALSE(std::isinf((*result)[0])); +} + +TEST(LossTest, BceBackward) { + // y = [1, 0], ypred = [0.8, 0.3] + // grad BCE w.r.t. ypred_i = (-y/ypred + (1-y)/(1-ypred)) / n + // grad[0] = (-1/0.8 + 0) / 2 = -0.625 + // grad[1] = (0 + 1/0.7) / 2 = 0.7143 + auto y = TensorFunctions::makeSharedTensor( + {2, 1}, {1.0, 0.0}, false); + auto ypred = TensorFunctions::makeSharedTensor( + {2, 1}, {0.8, 0.3}, true); + + BceLoss loss; + auto result = loss(y, ypred); + result->backward(); + + auto grads = ypred->getGrads(); + EXPECT_NEAR((*grads)[0], -0.625f, kTol); + EXPECT_NEAR((*grads)[1], 0.7143f, kTol); +} + +TEST(LossTest, RmseForward) { + // y = [1, 2, 3], ypred = [1.5, 2.5, 2.5] + // diffs = [-0.5, -0.5, 0.5] + // MSE = (0.25 + 0.25 + 0.25) / 3 = 0.25 + // RMSE = 0.5 + auto y = TensorFunctions::makeSharedTensor( + {3}, {1.0, 2.0, 3.0}, false); + auto ypred = TensorFunctions::makeSharedTensor( + {3}, {1.5, 2.5, 2.5}, true); + + auto loss = RmseLoss{}; + auto result = loss(y, ypred); + + EXPECT_NEAR((*result)[0], 0.5f, kTol); +} + +TEST(LossTest, RmsePerfectPrediction) { + auto y = TensorFunctions::makeSharedTensor( + {3}, {1.0, 2.0, 3.0}, false); + auto ypred = TensorFunctions::makeSharedTensor( + {3}, {1.0, 2.0, 3.0}, true); + + RmseLoss loss; + auto result = loss(y, ypred); + + EXPECT_NEAR((*result)[0], 0.0f, kTol); +} + +TEST(LossTest, RmseBackward) { + // y = [1, 0], ypred = [0.5, 0.5] + // diffs = [0.5, -0.5], MSE = 0.25, RMSE = 0.5 + // grad_i = -(y_i - ypred_i) / (n * RMSE) + // grad[0] = -(1 - 0.5) / (2 * 0.5) = -0.5 + // grad[1] = -(0 - 0.5) / (2 * 0.5) = 0.5 + auto y = TensorFunctions::makeSharedTensor( + {2}, {1.0, 0.0}, false); + auto ypred = TensorFunctions::makeSharedTensor( + {2}, {0.5, 0.5}, true); + + RmseLoss loss; + auto result = loss(y, ypred); + result->backward(); + + auto grads = ypred->getGrads(); + EXPECT_NEAR((*grads)[0], -0.5f, kTol); + EXPECT_NEAR((*grads)[1], 0.5f, kTol); +} \ No newline at end of file diff --git a/tests/backend/test_module.cpp b/tests/backend/test_module.cpp new file mode 100644 index 0000000..a74c88c --- /dev/null +++ b/tests/backend/test_module.cpp @@ -0,0 +1,239 @@ +/** + * @file test_layers.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-09 + * + * @copyright Copyright (c) 2026 + * + */ + +#include + +#include "module/layers/ff_layer.h" + +#include "module/activation_functions/relu.h" +#include "module/activation_functions/leaky_relu.h" +#include "module/activation_functions/softmax.h" +#include "module/activation_functions/sigmoid.h" + +#include "data_modeling/tensor_functions.h" +#include "computational_graph/tensor_ops/graph_creation.h" + +#include + +constexpr ftype delta = 1e-3; + +TEST(ActivationTest, ReluForward) { + auto t1 = TensorFunctions::Ones({3, 2}, false); + auto f = module::ReLu(); + + auto res = f(t1); + + for(size_t i=0; ibackward(); + + // Gradient: [0, 0, 1] (only where input > 0) + ASSERT_DOUBLE_EQ(x->getGrads()->get(0), 0.0); + ASSERT_DOUBLE_EQ(x->getGrads()->get(1), 0.0); + ASSERT_DOUBLE_EQ(x->getGrads()->get(2), 1.0); +} + +TEST(ActivationTest, LeakyReluForward) { + auto t1 = TensorFunctions::Ones({3, 2}, false); + + auto f = module::LeakyReLu(0.3); + auto res = f(t1); + + for(size_t i=0; ibackward(); + + // Gradient: [0, 0, 1] (only where input > 0) + ASSERT_DOUBLE_EQ(x->getGrads()->get(0), eps); + ASSERT_DOUBLE_EQ(x->getGrads()->get(1), eps); // by convention + ASSERT_DOUBLE_EQ(x->getGrads()->get(2), 1.0); +} + +TEST(ActivationTest, SigmoidForward) { + // sigmoid(0) = 0.5, sigmoid(1) = 0.7311, sigmoid(-1) = 0.2689 + auto t = Tensor({3}, {0.0, 1.0, -1.0}, true); + + module::Sigmoid sig; + auto res = sig(t); + + EXPECT_NEAR(res[0], 0.5, delta); + EXPECT_NEAR(res[1], 0.7311, delta); + EXPECT_NEAR(res[2], 0.2689, delta); +} + +TEST(ActivationTest, SigmoidLargePositive) { + // sigmoid(100) should be ~1, not inf or nan + auto t = Tensor({1}, {100.0}, true); + + module::Sigmoid sig; + auto res = sig(t); + + EXPECT_NEAR(res[0], 1.0, delta); + EXPECT_FALSE(std::isnan(res[0])); + EXPECT_FALSE(std::isinf(res[0])); +} + +TEST(ActivationTest, SigmoidLargeNegative) { + // sigmoid(-100) should be ~0, not nan + auto t = Tensor({1}, {-100.0}, true); + + module::Sigmoid sig; + auto res = sig(t); + + EXPECT_NEAR(res[0], 0.0, delta); + EXPECT_FALSE(std::isnan(res[0])); + EXPECT_FALSE(std::isinf(res[0])); +} + +TEST(AutogradTest, SigmoidBackward) { + // grad of sigmoid = sigmoid(x) * (1 - sigmoid(x)) + // for x=0: grad = 0.5 * 0.5 = 0.25 + // for x=1: grad = 0.7311 * 0.2689 = 0.1966 + auto t = TensorFunctions::makeSharedTensor( + {2}, {0.0, 1.0}, true); + + module::Sigmoid sig; + auto res = sig(t); + res->backward(); + + auto grads = t->getGrads(); + EXPECT_NEAR((*grads)[0], 0.25, delta); + EXPECT_NEAR((*grads)[1], 0.1966, delta); +} + +TEST(ActivationTest, SoftmaxForward) { + // softmax([1, 2, 3]) + // exp([1,2,3]) = [2.7183, 7.3891, 20.0855] + // sum = 30.1929 + // softmax = [0.0900, 0.2447, 0.6652] + auto t = Tensor({1, 3}, {1.0, 2.0, 3.0}, true); + + module::Softmax sm; + auto res = sm(t); + + EXPECT_NEAR(res[0], 0.0900, delta); + EXPECT_NEAR(res[1], 0.2447, delta); + EXPECT_NEAR(res[2], 0.6652, delta); +} + +TEST(ActivationTest, SoftmaxSumsToOne) { + auto t = Tensor({2, 4}, + {1.0, 2.0, 3.0, 4.0, + 2.0, 1.0, 4.0, 3.0}, + true); + + module::Softmax sm; + auto res = sm(t); + + // each row must sum to 1 + ftype row0sum = res[0] + res[1] + res[2] + res[3]; + ftype row1sum = res[4] + res[5] + res[6] + res[7]; + EXPECT_NEAR(row0sum, 1.0, delta); + EXPECT_NEAR(row1sum, 1.0, delta); +} + +TEST(ActivationTest, SoftmaxForwardNumericalStability) { + // large values should not produce nan or inf + auto t = Tensor({1, 3}, {100.0, 101.0, 102.0}, true); + + module::Softmax sm; + auto res = sm(t); + + for(int i = 0; i < 3; i++) { + EXPECT_FALSE(std::isnan(res[i])); + EXPECT_FALSE(std::isinf(res[i])); + } + ftype rowsum = res[0] + res[1] + res[2]; + EXPECT_NEAR(rowsum, 1.0, delta); +} + +TEST(AutogradTest, SoftmaxBackward) { + // for softmax with upstream grad of ones, the gradient is zero + // because d/dx_i sum(softmax(x)) = 0 (softmax sums to 1 always) + // more useful: upstream = [1, 0, 0] + // grad[i] = softmax[i] * (upstream[i] - dot(upstream, softmax)) + // for x=[1,2,3], softmax=[0.09, 0.2447, 0.6652] + // dot([1,0,0], softmax) = 0.09 + // grad[0] = 0.09 * (1 - 0.09) = 0.0819 + // grad[1] = 0.2447 * (0 - 0.09) = -0.0220 + // grad[2] = 0.6652 * (0 - 0.09) = -0.0599 + auto t = TensorFunctions::makeSharedTensor( + {1, 3}, {1.0, 2.0, 3.0}, true); + + module::Softmax sm; + auto resPtr = sm(t); + + // set upstream gradient to [1, 0, 0] + auto upstream = TensorFunctions::makeSharedTensor( + {1, 3}, {1.0, 0.0, 0.0}, false); + resPtr->setGrads(upstream); + resPtr->backward(); + + auto grads = t->getGrads(); + EXPECT_NEAR((*grads)[0], 0.0819, delta); + EXPECT_NEAR((*grads)[1], -0.0220, delta); + EXPECT_NEAR((*grads)[2], -0.0599, delta); +} + +TEST(LayerTest, TestFfLayer) { + auto t1 = TensorFunctions::Ones({3, 2}, false); + auto layer = module::FfLayer(2, 1, true, false); + + auto res = layer(t1); + + ASSERT_EQ(res.getDims(), Dimension({3, 1})); +} \ No newline at end of file diff --git a/tests/backend/test_train_loop.cpp b/tests/backend/test_train_loop.cpp new file mode 100644 index 0000000..247b9b0 --- /dev/null +++ b/tests/backend/test_train_loop.cpp @@ -0,0 +1,234 @@ +/** + * @file test_training.cpp + * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl) + * @brief + * @version 0.1 + * @date 2026-03-14 + * + * @copyright Copyright (c) 2026 + * + */ + +#include + +#include "module/networks/sequential.h" +#include "module/layers/ff_layer.h" + +#include "module/activation_functions/sigmoid.h" +#include "module/activation_functions/relu.h" +#include "module/activation_functions/leaky_relu.h" +#include "module/activation_functions/softmax.h" + +#include "training/optimizers/sgd.h" +#include "training/optimizers/rmsprop.h" + +#include "training/loss_functions/bce_loss.h" +#include "training/loss_functions/crossentropy_loss.h" +#include "training/loss_functions/bce_sigmoid_loss.h" +#include "training/loss_functions/crossentropy_softmax_loss.h" + +#include "training/trainers/base_train_loop.h" + +#include "data_modeling/tensor_functions.h" + +#include "system/sys_functions.h" + +using namespace std; + +static shared_ptr makeBinaryNet() { + auto net = make_shared(); + + net->append(make_shared(2, 4, true, true)); + + net->append(make_shared(0.01)); + + net->append(make_shared(4, 1, true, true)); + + net->append(make_shared()); + return net; +} + +static shared_ptr makeBinaryNet2() { + auto net = make_shared(); + + net->append(make_shared(2, 4, true, true)); + + net->append(make_shared(0.01)); + + net->append(make_shared(4, 1, true, true)); + + return net; +} + +static shared_ptr makeMulticlassNet() { + auto net = make_shared(); + + net->append(make_shared(2, 8, true, true)); + + net->append(make_shared(0.01)); + + net->append(make_shared(8, 3, true, true)); + + net->append(make_shared()); + return net; +} + +static shared_ptr makeMulticlassNet2() { + auto net = make_shared(); + + net->append(make_shared(2, 8, true, true)); + + net->append(make_shared(0.01)); + + net->append(make_shared(8, 3, true, true)); + + return net; +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + sys::setRandomSeed(42); + return RUN_ALL_TESTS(); +} + +TEST(OverfitTest, BceSgdOverfitsSmallDataset) { + // XOR-like: 4 samples, 2 features, binary labels + auto x = TensorFunctions::makeSharedTensor( + {4, 2}, {0.0, 0.0, + 0.0, 1.0, + 1.0, 0.0, + 1.0, 1.0}, false); + + auto y = TensorFunctions::makeSharedTensor( + {4, 1}, {0.0, + 1.0, + 1.0, + 0.0}, false); + + auto net = makeBinaryNet(); + auto loss = make_shared(); + auto optim = make_shared( + net->parameters(), /*lr=*/0.05); + + auto trainLoop = train::BaseTrainLoop( + net, loss, optim, /*epochs=*/2000, /*bsize=*/static_cast(4)); + + trainLoop.run(x, y, /*shuffle=*/false, /*verbose=*/false); + + // forward one more time to get final loss + auto pred = (*net)(x); + auto finalLoss = (*loss)(y, pred); + + EXPECT_LT((*finalLoss)[0], 0.05f) + << "Network failed to overfit binary dataset\n" + << "Final prediction: " << *pred << "\nFinal loss: " << *finalLoss; +} + +TEST(OverfitTest, BceSgdOverfitsSmallDataset_OptimizedLoss) { + // XOR-like: 4 samples, 2 features, binary labels + auto x = TensorFunctions::makeSharedTensor( + {4, 2}, {0.0, 0.0, + 0.0, 1.0, + 1.0, 0.0, + 1.0, 1.0}, false); + + auto y = TensorFunctions::makeSharedTensor( + {4, 1}, {0.0, + 1.0, + 1.0, + 0.0}, false); + + auto net = makeBinaryNet2(); + auto loss = make_shared(); + auto optim = make_shared( + net->parameters(), /*lr=*/0.05); + + auto trainLoop = train::BaseTrainLoop( + net, loss, optim, /*epochs=*/2000, /*bsize=*/static_cast(4)); + + trainLoop.run(x, y, /*shuffle=*/false, /*verbose=*/false); + + // forward one more time to get final loss + auto pred = (*net)(x); + auto finalLoss = (*loss)(y, pred); + + auto sigmoid = module::Sigmoid(); + EXPECT_LT((*finalLoss)[0], 0.05f) + << "Network failed to overfit binary dataset\n" + << "Final prediction: " << sigmoid(*pred) << "\nFinal loss: " << *finalLoss; +} + +TEST(OverfitTest, CrossEntropyRMSPropOverfitsSmallDataset) { + // 6 samples, 2 features, 3 classes + auto x = TensorFunctions::makeSharedTensor( + {6, 2}, {1.0, 0.0, + 1.0, 0.1, + 0.0, 1.0, + 0.1, 1.0, + 0.5, 0.5, + 0.4, 0.6}, false); + + // one-hot encoded labels + auto y = TensorFunctions::makeSharedTensor( + {6, 3}, {1.0, 0.0, 0.0, + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, + 0.0, 0.0, 1.0}, false); + + auto net = makeMulticlassNet(); + auto loss = make_shared(); + auto optim = make_shared( + net->parameters(), /*lr=*/0.0001, /*decay=*/0.95); + + auto trainLoop = train::BaseTrainLoop( + net, loss, optim, /*epochs=*/2000, /*bsize=*/6); + + trainLoop.run(x, y, /*shuffle=*/false, /*verbose=*/false); + + auto pred = (*net)(x); + auto finalLoss = (*loss)(y, pred); + + EXPECT_LT((*finalLoss)[0], 0.05f) + << "Network failed to overfit multiclass dataset" + << "Final prediction: " << *pred << "\nFinal loss: " << *finalLoss; +} + +TEST(OverfitTest, CrossEntropyRMSPropOverfitsSmallDataset_OptimizedLoss) { + // 6 samples, 2 features, 3 classes + auto x = TensorFunctions::makeSharedTensor( + {6, 2}, {1.0, 0.0, + 1.0, 0.1, + 0.0, 1.0, + 0.1, 1.0, + 0.5, 0.5, + 0.4, 0.6}, false); + + // one-hot encoded labels + auto y = TensorFunctions::makeSharedTensor( + {6, 3}, {1.0, 0.0, 0.0, + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, + 0.0, 0.0, 1.0}, false); + + auto net = makeMulticlassNet2(); + auto loss = make_shared(); + auto optim = make_shared( + net->parameters(), /*lr=*/0.0003, /*decay=*/0.95); + + auto trainLoop = train::BaseTrainLoop( + net, loss, optim, /*epochs=*/10000, /*bsize=*/6); + + trainLoop.run(x, y, /*shuffle=*/false, /*verbose=*/false); + + auto pred = (*net)(x); + auto finalLoss = (*loss)(y, pred); + + auto softmax = module::Softmax(); + EXPECT_LT((*finalLoss)[0], 0.05f) + << "Network failed to overfit multiclass dataset" + << "Final prediction: " << softmax(*pred) << "\nFinal loss: " << *finalLoss; +} \ No newline at end of file diff --git a/tests/python/test_training.py b/tests/python/test_training.py new file mode 100644 index 0000000..d974a0f --- /dev/null +++ b/tests/python/test_training.py @@ -0,0 +1,123 @@ +""" +Robert Baumgartner, r.baumgartner-1@tudelft.nl +""" + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "python_lib")) +print(sys.path) + +from dl_lib import Tensor +from dl_lib.nn import FfLayer, Sequential +from dl_lib.nn.activation import LeakyReLU +from dl_lib.train.loss import BCE, BceWithSigmoid, CrossEntropyWithSoftmax +from dl_lib.train.optim import SGD, RmsProp + +from dl_lib.sys import setSeed +import pytest + +setSeed(42) + +def train(net, loss_fn, optim, x, y, epochs): + for epoch in range(epochs): + ypred = net.forward(x) + loss = loss_fn(y, ypred) + + loss.backward() + optim.step() + optim.zeroGrad() + + return loss + +def make_binary_net(): + net = Sequential() + net.append(FfLayer(2, 4, True, True)) + net.append(LeakyReLU(0.01)) + net.append(FfLayer(4, 1, True, True)) + return net + +def make_multiclass_net(): + net = Sequential() + net.append(FfLayer(2, 8, True, True)) + net.append(LeakyReLU(0.01)) + net.append(FfLayer(8, 3, True, True)) + return net + +def make_xor_data(): + x = Tensor([4, 2], [0.0, 0.0, + 0.0, 1.0, + 1.0, 0.0, + 1.0, 1.0], False) + y = Tensor([4, 1], [0.0, + 1.0, + 1.0, + 0.0], False) + return x, y + +def make_multiclass_data(): + x = Tensor([6, 2], [1.0, 0.0, + 1.0, 0.1, + 0.0, 1.0, + 0.1, 1.0, + 0.5, 0.5, + 0.4, 0.6], False) + y = Tensor([6, 3], [1.0, 0.0, 0.0, + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, + 0.0, 0.0, 1.0], False) + return x, y + +class TestOverfitBinary: + def test_binary_sgd_overfits(self): + x, y = make_xor_data() + net = make_binary_net() + loss_fn = BceWithSigmoid() + optim = SGD(net.parameters(), 0.05) + + final_loss = train(net, loss_fn, optim, x, y, epochs=2000) + + assert final_loss.getitem(0) < 0.05, \ + f"SGD failed to overfit XOR, loss={final_loss.getitem(0)}" + + def test_binary_rmsprop_overfits(self): + x, y = make_xor_data() + net = make_binary_net() + loss_fn = BceWithSigmoid() + optim = RmsProp(net.parameters(), 0.0001, 0.95) + + final_loss = train(net, loss_fn, optim, x, y, epochs=5000) + + assert final_loss.getitem(0) < 0.05, \ + f"RmsProp failed to overfit XOR, loss={final_loss.getitem(0)}" + + def test_multiclass_rmsprop_overfits(self): + x, y = make_multiclass_data() + net = make_multiclass_net() + loss_fn = CrossEntropyWithSoftmax() + optim = RmsProp(net.parameters(), 0.0003, 0.95) + + final_loss = train(net, loss_fn, optim, x, y, epochs=10000) + + assert final_loss.getitem(0) < 0.05, \ + f"RmsProp failed to overfit multiclass, loss={final_loss.getitem(0)}" + + def test_loss_decreases(self): + """Loss should be strictly lower after training than before""" + x, y = make_xor_data() + net = make_binary_net() + loss_fn = BceWithSigmoid() + + optim = SGD(net.parameters(), 0.001) + initial_pred = net.forward(x) + initial_loss = loss_fn(y, initial_pred).getitem(0) + train(net, loss_fn, optim, x, y, epochs=2000) + + final_pred = net.forward(x) + final_loss = loss_fn(y, final_pred).getitem(0) + + assert final_loss < initial_loss, \ + f"Loss did not decrease: {initial_loss} -> {final_loss}" + \ No newline at end of file