RobBa · RobBa · Mar 21, 2026 · Mar 7, 2026 · Mar 7, 2026 · Mar 8, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,9 @@
 build
 .vscode
-unit_tests_backend
 *.txt
 python_lib/dl_lib/_compiled
-*__pycache__*
+*__pycache__*
+*_cache
+
+# TODO: remove later
+benchmarks
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -27,7 +27,7 @@ endif()
 add_compile_options("$<$<C_COMPILER_ID:MSVC>:/utf-8>")
 add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
 
-option(DOUBLE_PRECISION "Compile with double precision at cost of speed?" OFF) # TODO: not implemented yet
+# TODO: add flag for double precision?
 
 # include python libs
 if(APPLE)
@@ -40,7 +40,7 @@ if(APPLE)
   message("Python_INCLUDE_DIRS:${${Python_INCLUDE_DIRS}}")
 
   set(PYTHON_LIBRARIES ${Python_LIBRARIES})
-  string(COMPARE EQUAL "${PYTHON_LIBRARIES}" "" PYTHONLIBS_EMPTY)
+  string(COMPARE EQUAL "${Python_LIBRARIES}" "" PYTHONLIBS_EMPTY)
   if(PYTHONLIBS_EMPTY)
     message(FATAL_ERROR "Problem: PYTHON_LIBRARIES not found. Do you have Python installed on your machine?")
   endif()
@@ -50,19 +50,18 @@ if(APPLE)
     message("Failed to automatically find Python_INCLUDE_DIRS. Setting the PYTHON_INCLUDE_DIRS variable manually. If this crashes please adjust the following
             path to the path where Python.h resides (the one matching the found Python instance). Paths must be consistent iff multiple Python versions on machine.")
     set(PYTHON_H_PATH "/usr/local/opt/python@3.13/Frameworks/Python.framework/Versions/3.13/include/python3.13")
-    set(PYTHON_INCLUDE_DIRS "${PYTHON_H_PATH}")
+    set(PYTHON_INCLUDE_DIRS "${Python_H_PATH}")
   else()
     set(PYTHON_INCLUDE_DIRS ${Python_INCLUDE_DIRS})
   endif()
     #FindPython3()
-  message("Apple - Using Python:${Python_VERSION_MAJOR} - Libraries:${PYTHON_LIBRARIES} - IncludeDirs: ${PYTHON_INCLUDE_DIRS}")
+  message("Apple - Using Python:${Python_VERSION_MAJOR} - Libraries:${Python_LIBRARIES} - IncludeDirs: ${Python_INCLUDE_DIRS}")
 else()
   message("Getting PythonLibs on Linux or Windows path")
-  find_package(PythonLibs REQUIRED)
+  find_package(Python 3 REQUIRED COMPONENTS Interpreter Development)
 endif()
-include_directories(${PYTHON_INCLUDE_DIRS})
-message("Using Python:${Python_VERSION_MAJOR} - Libraries:${PYTHON_LIBRARIES} - IncludeDirs: ${PYTHON_INCLUDE_DIRS}")
-
+include_directories(${Python_INCLUDE_DIRS})
+message("Using Python:${Python_VERSION_MAJOR} - Libraries:${Python_LIBRARIES} - IncludeDirs: ${Python_INCLUDE_DIRS}")
 
 #set(CMAKE_MESSAGE_LOG_LEVEL WARNING)
 
@@ -108,4 +107,9 @@ option(BUILD_TESTS "Build tests" OFF)
 if(BUILD_TESTS)
     enable_testing()
     add_subdirectory(tests)
+endif()
+
+option(BUILD_BENCHMARKS "Build benchmarks" OFF)
+if(BUILD_BENCHMARKS)
+    add_subdirectory(benchmarks)
 endif()
diff --git a/python_lib/dl_lib/__init__.py b/python_lib/dl_lib/__init__.py
@@ -1,3 +1,5 @@
-from ._compiled._core import Tensor, Dimension, Device, Ones, Zeros, Gaussian
+from ._compiled._core import Tensor, Dimension, Device
 
-__all__ = ['Tensor', 'Device', 'Dimension']
+__all__ = ['Tensor', 'Device', 'Dimension']
+
+__version__ = "0.2.0"
diff --git a/python_lib/dl_lib/nn/__init__.py b/python_lib/dl_lib/nn/__init__.py
@@ -1,4 +1,5 @@
-#from .._compiled._layers import FfLayer, ReLU
+from .module import Module, Sequential
+from dl_lib._compiled._nn import FfLayer
 #from .._compiled._core import Tensor  # re-export if needed
 
-#__all__ = ['FfLayer', 'ReLU']
+__all__ = ['Module', 'Sequential', 'FfLayer']
diff --git a/python_lib/dl_lib/nn/activation/__init__.py b/python_lib/dl_lib/nn/activation/__init__.py
@@ -0,0 +1,4 @@
+from dl_lib._compiled._nn import ReLU, LeakyReLU, Softmax
+#from .._compiled._core import Tensor  # re-export if needed
+
+__all__ = ['ReLU', 'LeakyReLU', 'Softmax']
diff --git a/python_lib/dl_lib/nn/module.py b/python_lib/dl_lib/nn/module.py
@@ -0,0 +1,59 @@
+"""
+Module base class. We use it to automatically register network 
+modules when defining graphs via Module.
+"""
+
+from .._compiled._nn import _Module
+
+class Module(_Module):
+  def __init__(self):
+    object.__setattr__(self, "_modules", {}) # not necessary, but more explicit
+    self._modules = {} 
+
+  """
+  Stores attributes defined in __init__ in private 
+  _modules dictionary
+  """
+  def __setattr__(self, name, value):
+    if isinstance(value, Module):
+      self._modules[name] = value
+    object.__setattr__(self, name, value)
+
+  """
+  Returns a list of leaf parameters. Used to identify trainable
+  nodes of a graph.
+  """
+  def parameters(self):
+    params = self._own_parameters()  # calls C++ side for leaf modules
+    for module in self._modules.values():
+      params.extend(module.parameters())
+    return params
+
+"""
+For convenience.
+"""
+class Sequential(Module):
+  def __init__(self):
+    super().__init__()
+    object.__setattr__(self, "_layers", [])
+
+  def append(self, module):
+    self._layers.append(module)
+
+  def forward(self, x):
+    for layer in self._layers:
+      x = layer(x)
+    return x
+
+  def parameters(self):
+    params = []
+    for layer in self._layers:
+      if hasattr(layer, 'parameters'):
+        result = layer.parameters()
+        if isinstance(result, list):
+          params.extend(result)
+        else:
+          params.extend(list(result))  # force conversion from BP proxy
+      elif hasattr(layer, 'params'):
+        params.extend(list(layer.params))
+    return params
diff --git a/python_lib/dl_lib/sys/__init__.py b/python_lib/dl_lib/sys/__init__.py
@@ -0,0 +1 @@
+from dl_lib._compiled._sys import getDevice, setDevice, setSeed
diff --git a/python_lib/dl_lib/train/__init__.py b/python_lib/dl_lib/train/__init__.py
@@ -0,0 +1,4 @@
+from dl_lib._compiled._train import TrainLoop
+#from dl_lib._compiled._core import Tensor  # re-export if needed
+
+__all__ = ['TrainLoop']
diff --git a/python_lib/dl_lib/train/loss/__init__.py b/python_lib/dl_lib/train/loss/__init__.py
@@ -0,0 +1,4 @@
+from dl_lib._compiled._train import BCE, BceWithSigmoid, CrossEntropy, CrossEntropyWithSoftmax
+#from dl_lib._compiled._core import Tensor  # re-export if needed
+
+__all__ = ['BCE', 'BceWithSigmoid', 'CrossEntropy', 'CrossEntropyWithSoftmax']
diff --git a/python_lib/dl_lib/train/optim/__init__.py b/python_lib/dl_lib/train/optim/__init__.py
@@ -0,0 +1,4 @@
+from dl_lib._compiled._train import SGD, RmsProp
+#from dl_lib._compiled._core import Tensor  # re-export if needed
+
+__all__ = ['SGD', 'RmsProp']
diff --git a/readme.md b/readme.md
@@ -34,10 +34,10 @@ For some examples on Python interface, see tests/python.
 🚧 **Work in Progress** - Implementing additional layers and optimizations
 
 Roadmap:
-- [ ] Python Binding Unit Tests
-- [ ] Additional layer types (Conv2D, LSTM, etc.)
+- [x] Python Binding Unit Tests
+- [ ] Additional layer types (Conv2D, Dropout, etc.)
 - [ ] Optimizers and training framework
-- [ ] CUDA kernels for performance-critical operations
+- [ ] CUDA mode for operations
 - [ ] AlexNet reference implementation
 - [ ] Docker deployment example
 
@@ -62,11 +62,12 @@ ctest
 
 ## Required
 
-- Compiler capable of C++20 at least (we test with gcc 12.3.0)
+- Compiler capable of C++23 at least (we test with gcc 13.3.0)
 - Boost Python
-- Cmake > 3.24
+- Cmake > 3.28
 - Python 3 (we test with 3.10, but it should work with any version)
-- pytest for unit tests (we use 9.0.2)
+- pytest and GTest for unit tests (we use pytest=9.0.2)
+- Google Benchmark for benchmarking
 
 ## Troubleshooting
 

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -2,17 +2,38 @@
 add_subdirectory(backend)
 add_subdirectory(python)
 
-target_link_libraries(_core 
+target_link_libraries(_core PRIVATE
                       ${Boost_LIBRARIES} 
-                      ${PYTHON_LIBRARIES} 
+                      ${Python_LIBRARIES} 
                       BackendCore)
 
 target_include_directories(_core PRIVATE 
+                           ${Python_INCLUDE_DIRS} 
+                           ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(_nn PRIVATE
+                      ${Boost_LIBRARIES} 
+                      ${PYTHON_LIBRARIES} 
+                      BackendCore)
+
+target_include_directories(_nn PRIVATE 
+                           ${PYTHON_INCLUDE_DIRS} 
+                           ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(_sys PRIVATE
+                      ${Boost_LIBRARIES} 
+                      ${PYTHON_LIBRARIES} 
+                      BackendCore)
+
+target_include_directories(_sys PRIVATE 
                            ${PYTHON_INCLUDE_DIRS} 
                            ${Boost_INCLUDE_DIRS})
 
-#target_link_libraries(py_layers PRIVATE ${Boost_LIBRARIES} ${PYTHON_LIBRARIES} BackendCore)
-#target_include_directories(py_layers PRIVATE ${PYTHON_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
+target_link_libraries(_train PRIVATE
+                      ${Boost_LIBRARIES} 
+                      ${PYTHON_LIBRARIES} 
+                      BackendCore)
 
-# for compiled boost lib
-#target_link_libraries(hello PRIVATE Boost::filesystem)
+target_include_directories(_train PRIVATE 
+                           ${PYTHON_INCLUDE_DIRS} 
+                           ${Boost_INCLUDE_DIRS})
diff --git a/src/backend/CMakeLists.txt b/src/backend/CMakeLists.txt
@@ -1,25 +1,18 @@
-#include_directories(
-#        "${CMAKE_CURRENT_SOURCE_DIR}/computational_graph"
-#        "${CMAKE_CURRENT_SOURCE_DIR}/data_modeling"
-#        "${CMAKE_CURRENT_SOURCE_DIR}/utility"
-#        "${CMAKE_CURRENT_SOURCE_DIR}/layers"
-#        "${CMAKE_CURRENT_SOURCE_DIR}/system"
-#        "${CMAKE_CURRENT_SOURCE_DIR}/training/loss_functions"
-#        "${CMAKE_CURRENT_SOURCE_DIR}/training/optimizers"
-#        )
-
 file(GLOB_RECURSE CORE_SOURCES
     computational_graph/*.cpp
     data_modeling/*.cpp
-    #layers/*.cpp
-    #networks/*.cpp
-    #training/*.cpp
-    utility/*.cpp
+    module/*.cpp
     system/*.cpp
+    training/*.cpp
+    utility/*.cpp
 )
 
-add_library(BackendCore STATIC ${CORE_SOURCES})
+add_library(BackendCore SHARED ${CORE_SOURCES})
 
 target_include_directories(BackendCore PUBLIC
     ${CMAKE_CURRENT_SOURCE_DIR}
+)
+
+set_target_properties(BackendCore PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY "${PYTHON_MODULE_DIR}" # make sure Python-modules see backend
 )
diff --git a/src/backend/computational_graph/activation_functions/leaky_relu_node.cpp b/src/backend/computational_graph/activation_functions/leaky_relu_node.cpp
@@ -0,0 +1,31 @@
+/**
+ * @file leaky_relu_node.cpp
+ * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl)
+ * @brief 
+ * @version 0.1
+ * @date 2026-03-07
+ * 
+ * @copyright Copyright (c) 2026
+ * 
+ */
+
+#include "leaky_relu_node.h"
+
+#include <utility>
+
+using namespace std;
+using namespace cgraph;
+
+vector<shared_ptr<Tensor>> LeakyReLuNode::backward(const Tensor& upstreamGrad) {
+  assert(!upstreamGrad.getRequiresGrad());
+  constexpr ftype zero = 0.0;
+
+  auto res = make_shared<Tensor>(upstreamGrad.getDims(), upstreamGrad.getDevice(), false);
+
+  const auto& parent = parents[0];
+  for(tensorSize_t i=0; i<upstreamGrad.getSize(); i++){
+    res->set((*parent)[i] > zero ? upstreamGrad[i] : upstreamGrad[i] * eps, i);
+  }
+
+  return {res};
+}
diff --git a/src/backend/computational_graph/activation_functions/leaky_relu_node.h b/src/backend/computational_graph/activation_functions/leaky_relu_node.h
@@ -0,0 +1,29 @@
+/**
+ * @file leaky_relu_node.h
+ * @author Robert Baumgartner (r.baumgartner-1@tudelft.nl)
+ * @brief 
+ * @version 0.1
+ * @date 2026-03-07
+ * 
+ * @copyright Copyright (c) 2026
+ * 
+ */
+
+#pragma once
+
+#include "computational_graph/graph_node.h"
+
+#include <memory>
+
+namespace cgraph {
+  class LeakyReLuNode final : public GraphNode {
+    private:
+      const ftype eps;
+
+    public:
+      explicit LeakyReLuNode(std::shared_ptr<Tensor> t, const ftype eps) 
+        : GraphNode({std::move(t)}), eps{eps} {}
+
+      std::vector<std::shared_ptr<Tensor>> backward(const Tensor& upstreamGrad) override;
+  };
+}
diff --git a/...backend/computational_graph/relu_node.cpp → ..._graph/activation_functions/relu_node.cpp b/...backend/computational_graph/relu_node.cpp → ..._graph/activation_functions/relu_node.cpp
@@ -14,17 +14,18 @@
 #include <utility>
 
 using namespace std;
-using namespace graph;
+using namespace cgraph;
 
 vector<shared_ptr<Tensor>> ReLuNode::backward(const Tensor& upstreamGrad) {
   assert(!upstreamGrad.getRequiresGrad());
-
   constexpr ftype zero = 0.0;
 
-  auto res = make_shared<Tensor>(upstreamGrad.getDims().toVector(), upstreamGrad.getDevice(), false);
+  auto res = make_shared<Tensor>(upstreamGrad.getDims(), upstreamGrad.getDevice(), false);
+
+  const auto& parent = parents[0];
   for(tensorSize_t i=0; i<upstreamGrad.getSize(); i++){
-    auto v = upstreamGrad.getItem(i);
-    res->setItem(v > zero ? v : zero, i);
+    res->set((*parent)[i] > zero ? upstreamGrad[i] : zero, i);
   }
-  return {std::move(res)};
+
+  return {res};
 }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from dl_lib._compiled._sys import getDevice, setDevice, setSeed