From 3e4bc05a2ed0c4f0e6da4c4739e50d97f9d70c80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Felipe=20Santos?=
 <5733+jfsantos@users.noreply.github.com>
Date: Fri, 13 Feb 2026 16:54:22 -0800
Subject: [PATCH 1/4] Add profiling instrumentation for NAM building blocks
 (#219)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add profiling instrumentation for NAM building blocks

Adds a profiling framework (NAM/profiling.h, NAM/profiling.cpp) with
NAM_PROFILE_START()/NAM_PROFILE_ADD() macros and 14 timing categories.
Supports both desktop (std::chrono) and ARM Cortex-M7 (DWT cycle counter)
backends. Profiling is compile-time gated via -DNAM_PROFILING.

Instruments wavenet _Layer::Process() and _LayerArray::ProcessInner()
with per-category timing, and adds profiling reset/print calls to the
benchmodel tool.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Fixed build flags for benchmodel

* Added a command line tool to output memory usage for a given .nam file

* Bugfix - checking that condition_dsp is not null in the JSON (#220)

* [BUGFIX, BREAKING] Make activation base class abstract, fix PReLU implementation (#223)

* Make activation apply method pure virtual instead of no-op default

* Fix bugs

* Refactor to throw std::invalid_argument in debug mode, add tests

* Add TONE3000 support note in README.md (#224)

* Replace hardcoded profiling struct with dynamic registry

  The Timings struct hardcoded 14 named fields, requiring manual updates
  to reset(), total(), print_results(), and every call site whenever a
  category was added or removed. Replace with a flat-array registry where
  types are registered at file scope via register_type(), returning an
  integer index for O(1) accumulation in the hot path.

  Also adds NAM_PROFILE_RESTART() macro to replace a raw #ifdef block
  in wavenet.cpp.

---------

Co-authored-by: João Felipe Santos <santosjf@pm.me>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Steven Atkinson <steven@atkinson.mn>
---
 NAM/conv1d.cpp         |   4 +
 NAM/dsp.cpp            |   4 +
 NAM/film.h             |   4 +
 NAM/profiling.cpp      |  88 ++++++
 NAM/profiling.h        |  85 ++++++
 NAM/wavenet.cpp        |  31 +++
 tools/CMakeLists.txt   |   6 +-
 tools/benchmodel.cpp   |   7 +
 tools/memory_usage.cpp | 611 +++++++++++++++++++++++++++++++++++++++++
 9 files changed, 839 insertions(+), 1 deletion(-)
 create mode 100644 NAM/profiling.cpp
 create mode 100644 NAM/profiling.h
 create mode 100644 tools/memory_usage.cpp
diff --git a/NAM/conv1d.cpp b/NAM/conv1d.cpp
index 9bbbc020..d440f0c1 100644
--- a/NAM/conv1d.cpp
+++ b/NAM/conv1d.cpp
@@ -1,4 +1,5 @@
 #include "conv1d.h"
+#include "profiling.h"
 #include <stdexcept>
 
 namespace nam
@@ -143,6 +144,9 @@ void Conv1D::SetMaxBufferSize(const int maxBufferSize)
 
 void Conv1D::Process(const Eigen::MatrixXf& input, const int num_frames)
 {
+  // Note: Profiling is done at the caller level (e.g., _Layer::Process in wavenet.cpp)
+  // to avoid double-counting when Conv1D is called from within profiled blocks.
+
   // Write input to ring buffer
   _input_buffer.Write(input, num_frames);
 
diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
index 05dab09d..b644af31 100644
--- a/NAM/dsp.cpp
+++ b/NAM/dsp.cpp
@@ -8,6 +8,7 @@
 #include <unordered_set>
 
 #include "dsp.h"
+#include "profiling.h"
 #include "registry.h"
 
 #define tanh_impl_ std::tanh
@@ -443,6 +444,9 @@ Eigen::MatrixXf nam::Conv1x1::process(const Eigen::MatrixXf& input, const int nu
 
 void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, const int num_frames)
 {
+  // Note: Profiling is done at the caller level (e.g., _Layer::Process in wavenet.cpp)
+  // to provide meaningful categories (input_mixin, layer1x1, head1x1, rechannel)
+  // rather than generic conv1x1.
   assert(num_frames <= _output.cols());
 
   if (this->_is_depthwise)
diff --git a/NAM/film.h b/NAM/film.h
index f0f86fb4..eeb750a4 100644
--- a/NAM/film.h
+++ b/NAM/film.h
@@ -81,9 +81,13 @@ class FiLM
     assert(num_frames <= condition.cols());
     assert(num_frames <= _output.cols());
 
+    // Conv1x1 to compute scale/shift from condition
     _cond_to_scale_shift.process_(condition, num_frames);
     const auto& scale_shift = _cond_to_scale_shift.GetOutput();
 
+    // Note: FiLM time is included in the caller's profiling category (e.g., conv1d, input_mixin)
+    // rather than tracked separately, to avoid double-counting.
+
     const auto scale = scale_shift.topRows(get_input_dim()).leftCols(num_frames);
     if (_do_shift)
     {
diff --git a/NAM/profiling.cpp b/NAM/profiling.cpp
new file mode 100644
index 00000000..885872ee
--- /dev/null
+++ b/NAM/profiling.cpp
@@ -0,0 +1,88 @@
+#include "profiling.h"
+
+#ifdef NAM_PROFILING
+
+#if defined(__ARM_ARCH_7EM__) || defined(ARM_MATH_CM7)
+// ARM Cortex-M7: Use DWT cycle counter for precise timing
+#include "stm32h7xx.h"
+
+namespace nam {
+namespace profiling {
+
+ProfilingEntry g_entries[MAX_PROFILING_TYPES] = {};
+int g_num_entries = 0;
+
+// CPU frequency in MHz (Daisy runs at 480 MHz)
+static constexpr uint32_t CPU_FREQ_MHZ = 480;
+
+uint32_t get_time_us() {
+  // DWT->CYCCNT gives cycle count
+  // Divide by CPU_FREQ_MHZ to get microseconds
+  return DWT->CYCCNT / CPU_FREQ_MHZ;
+}
+
+} // namespace profiling
+} // namespace nam
+
+#else
+// Non-ARM: Use std::chrono for timing (for testing on desktop)
+#include <chrono>
+
+namespace nam {
+namespace profiling {
+
+ProfilingEntry g_entries[MAX_PROFILING_TYPES] = {};
+int g_num_entries = 0;
+
+uint32_t get_time_us() {
+  using namespace std::chrono;
+  static auto start = high_resolution_clock::now();
+  auto now = high_resolution_clock::now();
+  return (uint32_t)duration_cast<microseconds>(now - start).count();
+}
+
+} // namespace profiling
+} // namespace nam
+
+#endif // ARM check
+
+namespace nam {
+namespace profiling {
+
+int register_type(const char* name) {
+  int idx = g_num_entries++;
+  g_entries[idx].name = name;
+  g_entries[idx].accumulated_us = 0;
+  return idx;
+}
+
+void reset() {
+  for (int i = 0; i < g_num_entries; i++)
+    g_entries[i].accumulated_us = 0;
+}
+
+void print_results() {
+  uint32_t total = 0;
+  for (int i = 0; i < g_num_entries; i++)
+    total += g_entries[i].accumulated_us;
+
+  printf("\nProfiling breakdown:\n");
+  printf("%-12s %8s %6s\n", "Category", "Time(ms)", "%");
+  printf("%-12s %8s %6s\n", "--------", "--------", "----");
+
+  for (int i = 0; i < g_num_entries; i++) {
+    uint32_t us = g_entries[i].accumulated_us;
+    if (us > 0) {
+      uint32_t pct = total > 0 ? (us * 100 / total) : 0;
+      printf("%-12s %8.1f %5lu%%\n", g_entries[i].name, us / 1000.0f, (unsigned long)pct);
+    }
+  }
+
+  printf("%-12s %8s %6s\n", "--------", "--------", "----");
+  printf("%-12s %8.1f %5s\n", "Total", total / 1000.0f, "100%");
+}
+
+} // namespace profiling
+} // namespace nam
+
+#endif // NAM_PROFILING
diff --git a/NAM/profiling.h b/NAM/profiling.h
new file mode 100644
index 00000000..4db570b9
--- /dev/null
+++ b/NAM/profiling.h
@@ -0,0 +1,85 @@
+#pragma once
+
+// Dynamic profiling registry for NAM building blocks
+// Enable with -DNAM_PROFILING
+//
+// Usage:
+//   1. Register profiling types at file scope (static init):
+//        static int PROF_FOO = nam::profiling::register_type("Foo");
+//   2. Call nam::profiling::reset() before benchmark
+//   3. In hot path:
+//        NAM_PROFILE_START();
+//        // ... code ...
+//        NAM_PROFILE_ADD(PROF_FOO);
+//   4. Call nam::profiling::print_results() to display breakdown
+
+#ifdef NAM_PROFILING
+
+#include <cstdint>
+#include <cstdio>
+
+namespace nam {
+namespace profiling {
+
+constexpr int MAX_PROFILING_TYPES = 32;
+
+struct ProfilingEntry {
+  const char* name;
+  uint32_t accumulated_us;
+};
+
+extern ProfilingEntry g_entries[MAX_PROFILING_TYPES];
+extern int g_num_entries;
+
+// Register a named profiling type. Returns index for fast accumulation.
+// Called at static-init time or during setup, NOT in the hot path.
+int register_type(const char* name);
+
+// Get current time in microseconds (platform-specific)
+uint32_t get_time_us();
+
+// Reset all profiling counters
+void reset();
+
+// Print profiling results to stdout
+void print_results();
+
+// Helper macros for timing sections
+// Usage:
+//   NAM_PROFILE_START();
+//   // ... code to profile ...
+//   NAM_PROFILE_ADD(PROF_FOO);  // Adds elapsed time to entry, resets timer
+
+#define NAM_PROFILE_START() uint32_t _prof_start = nam::profiling::get_time_us()
+#define NAM_PROFILE_ADD(idx) do { \
+  uint32_t _prof_now = nam::profiling::get_time_us(); \
+  nam::profiling::g_entries[idx].accumulated_us += (_prof_now - _prof_start); \
+  _prof_start = _prof_now; \
+} while(0)
+
+// Variant that doesn't reset the timer (for one-shot measurements)
+#define NAM_PROFILE_ADD_NORESTART(idx) \
+  nam::profiling::g_entries[idx].accumulated_us += (nam::profiling::get_time_us() - _prof_start)
+
+// Reset the timer without recording (for re-syncing mid-function)
+#define NAM_PROFILE_RESTART() _prof_start = nam::profiling::get_time_us()
+
+} // namespace profiling
+} // namespace nam
+
+#else // NAM_PROFILING not defined
+
+// No-op macros when profiling is disabled
+#define NAM_PROFILE_START() ((void)0)
+#define NAM_PROFILE_ADD(idx) ((void)0)
+#define NAM_PROFILE_ADD_NORESTART(idx) ((void)0)
+#define NAM_PROFILE_RESTART() ((void)0)
+
+namespace nam {
+namespace profiling {
+  inline void reset() {}
+  inline void print_results() {}
+} // namespace profiling
+} // namespace nam
+
+#endif // NAM_PROFILING
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index 6eb74a3b..4a1b5217 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -6,9 +6,20 @@
 #include <Eigen/Dense>
 
 #include "get_dsp.h"
+#include "profiling.h"
 #include "registry.h"
 #include "wavenet.h"
 
+#ifdef NAM_PROFILING
+static int PROF_CONV1D      = nam::profiling::register_type("Conv1D");
+static int PROF_INPUT_MIXIN = nam::profiling::register_type("InputMixin");
+static int PROF_LAYER1X1    = nam::profiling::register_type("Layer1x1");
+static int PROF_HEAD1X1     = nam::profiling::register_type("Head1x1");
+static int PROF_RECHANNEL   = nam::profiling::register_type("Rechannel");
+static int PROF_ACTIVATION  = nam::profiling::register_type("Activation");
+static int PROF_COPIES      = nam::profiling::register_type("Copies");
+#endif
+
 // Layer ======================================================================
 
 void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize)
@@ -89,6 +100,8 @@ void nam::wavenet::_Layer::set_weights_(std::vector<float>::iterator& weights)
 
 void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames)
 {
+  NAM_PROFILE_START();
+
   const long bottleneck = this->_bottleneck; // Use the actual bottleneck value, not the doubled output channels
 
   // Step 1: input convolutions
@@ -107,6 +120,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
     Eigen::MatrixXf& conv_output = this->_conv.GetOutput();
     this->_conv_post_film->Process_(conv_output, condition, num_frames);
   }
+  NAM_PROFILE_ADD(PROF_CONV1D);
 
   if (this->_input_mixin_pre_film)
   {
@@ -123,8 +137,12 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
     Eigen::MatrixXf& input_mixin_output = this->_input_mixin.GetOutput();
     this->_input_mixin_post_film->Process_(input_mixin_output, condition, num_frames);
   }
+  NAM_PROFILE_ADD(PROF_INPUT_MIXIN);
+
   this->_z.leftCols(num_frames).noalias() =
     _conv.GetOutput().leftCols(num_frames) + _input_mixin.GetOutput().leftCols(num_frames);
+  NAM_PROFILE_ADD(PROF_COPIES);
+
   if (this->_activation_pre_film)
   {
     this->_activation_pre_film->Process_(this->_z, condition, num_frames);
@@ -139,6 +157,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
   if (this->_gating_mode == GatingMode::NONE)
   {
     this->_activation->apply(this->_z.leftCols(num_frames));
+    NAM_PROFILE_ADD(PROF_ACTIVATION);
     if (this->_activation_post_film)
     {
       this->_activation_post_film->Process_(this->_z, condition, num_frames);
@@ -146,6 +165,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
     if (this->_layer1x1)
     {
       this->_layer1x1->process_(this->_z, num_frames);
+      NAM_PROFILE_ADD(PROF_LAYER1X1);
     }
   }
   else if (this->_gating_mode == GatingMode::GATED)
@@ -155,6 +175,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
     auto input_block = this->_z.leftCols(num_frames);
     auto output_block = this->_z.topRows(bottleneck).leftCols(num_frames);
     this->_gating_activation->apply(input_block, output_block);
+    NAM_PROFILE_ADD(PROF_ACTIVATION);
     if (this->_activation_post_film)
     {
       // Use Process() for blocks and copy result back
@@ -165,6 +186,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
     if (this->_layer1x1)
     {
       this->_layer1x1->process_(this->_z.topRows(bottleneck), num_frames);
+      NAM_PROFILE_ADD(PROF_LAYER1X1);
     }
   }
   else if (this->_gating_mode == GatingMode::BLENDED)
@@ -174,6 +196,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
     auto input_block = this->_z.leftCols(num_frames);
     auto output_block = this->_z.topRows(bottleneck).leftCols(num_frames);
     this->_blending_activation->apply(input_block, output_block);
+    NAM_PROFILE_ADD(PROF_ACTIVATION);
     if (this->_activation_post_film)
     {
       // Use Process() for blocks and copy result back
@@ -184,6 +207,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
     if (this->_layer1x1)
     {
       this->_layer1x1->process_(this->_z.topRows(bottleneck), num_frames);
+      NAM_PROFILE_ADD(PROF_LAYER1X1);
       if (this->_layer1x1_post_film)
       {
         Eigen::MatrixXf& layer1x1_output = this->_layer1x1->GetOutput();
@@ -207,6 +231,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
       Eigen::MatrixXf& head1x1_output = this->_head1x1->GetOutput();
       this->_head1x1_post_film->Process_(head1x1_output, condition, num_frames);
     }
+    NAM_PROFILE_ADD(PROF_HEAD1X1);
     this->_output_head.leftCols(num_frames).noalias() = this->_head1x1->GetOutput().leftCols(num_frames);
   }
   else // No head 1x1
@@ -230,6 +255,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
     // If layer1x1 is inactive, residual connection is just the input (identity)
     this->_output_next_layer.leftCols(num_frames).noalias() = input.leftCols(num_frames);
   }
+  NAM_PROFILE_ADD(PROF_COPIES);
 }
 
 // LayerArray =================================================================
@@ -298,9 +324,12 @@ void nam::wavenet::_LayerArray::Process(const Eigen::MatrixXf& layer_inputs, con
 void nam::wavenet::_LayerArray::ProcessInner(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition,
                                              const int num_frames)
 {
+  NAM_PROFILE_START();
+
   // Process rechannel and get output
   this->_rechannel.process_(layer_inputs, num_frames);
   Eigen::MatrixXf& rechannel_output = _rechannel.GetOutput();
+  NAM_PROFILE_ADD(PROF_RECHANNEL);
 
   // Process layers
   for (size_t i = 0; i < this->_layers.size(); i++)
@@ -329,7 +358,9 @@ void nam::wavenet::_LayerArray::ProcessInner(const Eigen::MatrixXf& layer_inputs
     this->_layers[last_layer].GetOutputNextLayer().leftCols(num_frames);
 
   // Process head rechannel
+  NAM_PROFILE_RESTART();
   _head_rechannel.process_(this->_head_inputs, num_frames);
+  NAM_PROFILE_ADD(PROF_RECHANNEL);
 }
 
 
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 8118e085..8f02f20e 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -12,10 +12,13 @@ include_directories(tools ${NAM_DEPS_PATH}/nlohmann)
 
 add_executable(loadmodel loadmodel.cpp ${NAM_SOURCES})
 add_executable(benchmodel benchmodel.cpp ${NAM_SOURCES})
+add_executable(memory_usage memory_usage.cpp)
 add_executable(run_tests run_tests.cpp test/allocation_tracking.cpp ${NAM_SOURCES})
 # Compile run_tests without optimizations to ensure allocation tracking works correctly
 # Also ensure assertions are enabled (NDEBUG is not defined) so tests actually run
 set_target_properties(run_tests PROPERTIES COMPILE_OPTIONS "-O0")
+# Benchmodel should be built with NAM_PROFILING set
+target_compile_definitions(benchmodel PRIVATE NAM_PROFILING)
 # Ensure assertions are enabled for run_tests by removing NDEBUG if it was set
 # Release/RelWithDebInfo/MinSizeRel build types automatically define NDEBUG
 # We use a compile option to undefine it, which works on GCC, Clang, and MSVC
@@ -32,6 +35,7 @@ endif()
 source_group(NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${NAM_SOURCES})
 
 target_compile_features(${TOOLS} PUBLIC cxx_std_20)
+target_compile_features(memory_usage PUBLIC cxx_std_20)
 
 set_target_properties(${TOOLS}
 	PROPERTIES
@@ -61,4 +65,4 @@ endif()
 # /Users/steve/src/NeuralAmpModelerCore/Dependencies/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h
 # Don't let this break my build on debug:
 set_source_files_properties(../NAM/dsp.cpp PROPERTIES COMPILE_FLAGS "-Wno-error")
-set_source_files_properties(../NAM/conv1d.cpp PROPERTIES COMPILE_FLAGS "-Wno-error")
\ No newline at end of file
+set_source_files_properties(../NAM/conv1d.cpp PROPERTIES COMPILE_FLAGS "-Wno-error")
diff --git a/tools/benchmodel.cpp b/tools/benchmodel.cpp
index 39c14b0e..42556f59 100644
--- a/tools/benchmodel.cpp
+++ b/tools/benchmodel.cpp
@@ -4,6 +4,7 @@
 
 #include "NAM/dsp.h"
 #include "NAM/get_dsp.h"
+#include "NAM/profiling.h"
 
 using std::chrono::duration;
 using std::chrono::duration_cast;
@@ -62,6 +63,9 @@ int main(int argc, char* argv[])
       outputPtrs[ch] = outputBuffers[ch].data();
     }
 
+    // Reset profiling counters before benchmark
+    nam::profiling::reset();
+
     std::cout << "Running benchmark\n";
     auto t1 = high_resolution_clock::now();
     for (size_t i = 0; i < numBuffers; i++)
@@ -80,6 +84,9 @@ int main(int argc, char* argv[])
 
     std::cout << ms_int.count() << "ms\n";
     std::cout << ms_double.count() << "ms\n";
+
+    // Print profiling breakdown if enabled
+    nam::profiling::print_results();
   }
   else
   {
diff --git a/tools/memory_usage.cpp b/tools/memory_usage.cpp
new file mode 100644
index 00000000..853ca8fe
--- /dev/null
+++ b/tools/memory_usage.cpp
@@ -0,0 +1,611 @@
+// memory_usage.cpp — Report total memory required to host a NAM model at runtime.
+//
+// Usage: memory_usage <model_path> [--buffer-size N]
+//
+// Parses the .nam JSON config and computes weight memory (learned parameters stored
+// in Eigen matrices/vectors) and buffer memory (intermediate computation/state that
+// depends on maxBufferSize) without instantiating the model.
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <numeric>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "json.hpp"
+
+using json = nlohmann::json;
+
+static constexpr int DEFAULT_BUFFER_SIZE = 2048;
+static constexpr long INPUT_BUFFER_SAFETY_FACTOR = 32;
+
+// ─── Result accumulator ─────────────────────────────────────────────────────
+
+struct MemoryResult
+{
+  size_t weight_bytes = 0;
+  size_t buffer_bytes = 0;
+
+  void add_weights(size_t floats) { weight_bytes += floats * sizeof(float); }
+  void add_buffers(size_t floats) { buffer_bytes += floats * sizeof(float); }
+
+  MemoryResult& operator+=(const MemoryResult& o)
+  {
+    weight_bytes += o.weight_bytes;
+    buffer_bytes += o.buffer_bytes;
+    return *this;
+  }
+};
+
+// ─── Conv1x1 ────────────────────────────────────────────────────────────────
+
+// Conv1x1 stores either a full (out_channels x in_channels) matrix (possibly
+// block-diagonal when grouped), or a depthwise weight vector when groups ==
+// in_channels == out_channels.
+static MemoryResult conv1x1_memory(int in_ch, int out_ch, bool bias, int groups, int M)
+{
+  MemoryResult r;
+  bool depthwise = (groups == in_ch && in_ch == out_ch);
+  if (depthwise)
+    r.add_weights(in_ch); // _depthwise_weight(in_ch)
+  else
+    r.add_weights((size_t)out_ch * in_ch); // _weight(out_ch, in_ch)
+  if (bias)
+    r.add_weights(out_ch); // _bias(out_ch)
+  r.add_buffers((size_t)out_ch * M); // _output(out_ch, M)
+  return r;
+}
+
+// ─── Conv1D ─────────────────────────────────────────────────────────────────
+
+// Conv1D stores kernel_size weight matrices (each out_ch x in_ch) or depthwise
+// vectors, plus a bias vector, a ring buffer, and an output buffer.
+static MemoryResult conv1d_memory(int in_ch, int out_ch, int kernel_size, bool bias, int dilation, int groups, int M)
+{
+  MemoryResult r;
+  bool depthwise = (groups == in_ch && in_ch == out_ch);
+  if (depthwise)
+    r.add_weights((size_t)kernel_size * in_ch); // _depthwise_weight[k](in_ch)
+  else
+    r.add_weights((size_t)kernel_size * out_ch * in_ch); // _weight[k](out_ch, in_ch)
+  if (bias)
+    r.add_weights(out_ch); // _bias(out_ch)
+
+  // Ring buffer: storage = (in_ch, 2 * max_lookback + M)
+  // max_lookback = (kernel_size - 1) * dilation
+  long max_lookback = (kernel_size > 0) ? (long)(kernel_size - 1) * dilation : 0;
+  long ring_storage = 2 * max_lookback + M;
+  r.add_buffers((size_t)in_ch * ring_storage); // _input_buffer._storage
+
+  // Output buffer: (out_ch, M)
+  r.add_buffers((size_t)out_ch * M); // _output
+
+  return r;
+}
+
+// ─── FiLM ───────────────────────────────────────────────────────────────────
+
+struct FiLMParams
+{
+  bool active = false;
+  bool shift = true;
+  int groups = 1;
+};
+
+static MemoryResult film_memory(int condition_dim, int input_dim, const FiLMParams& fp, int M)
+{
+  if (!fp.active)
+    return {};
+  MemoryResult r;
+  int scale_shift_dim = fp.shift ? 2 * input_dim : input_dim;
+  // _cond_to_scale_shift is a Conv1x1(condition_dim -> scale_shift_dim, bias=true, groups)
+  r += conv1x1_memory(condition_dim, scale_shift_dim, true, fp.groups, M);
+  // _output(input_dim, M)
+  r.add_buffers((size_t)input_dim * M);
+  return r;
+}
+
+// ─── BatchNorm ──────────────────────────────────────────────────────────────
+
+static MemoryResult batchnorm_memory(int dim)
+{
+  MemoryResult r;
+  // Stores scale(dim) + loc(dim) derived from running_mean, running_var, weight, bias, eps
+  // The source values are consumed from weights array; only scale + loc are stored at runtime.
+  r.add_weights(2 * (size_t)dim);
+  return r;
+}
+
+// ─── LSTM ───────────────────────────────────────────────────────────────────
+
+static MemoryResult lstm_memory(const json& config)
+{
+  MemoryResult r;
+  int num_layers = config["num_layers"];
+  int input_size = config["input_size"];
+  int hidden_size = config["hidden_size"];
+  int in_channels = config.value("in_channels", 1);
+  int out_channels = config.value("out_channels", 1);
+
+  for (int i = 0; i < num_layers; i++)
+  {
+    int cell_input = (i == 0) ? input_size : hidden_size;
+    // _w(4*H, I+H)
+    r.add_weights((size_t)4 * hidden_size * (cell_input + hidden_size));
+    // _b(4*H)
+    r.add_weights(4 * (size_t)hidden_size);
+    // _xh(I+H) — stores initial hidden state in the hidden portion
+    r.add_weights((size_t)(cell_input + hidden_size));
+    // _c(H) — initial cell state
+    r.add_weights((size_t)hidden_size);
+
+    // Buffers: _ifgo(4*H)
+    r.add_buffers(4 * (size_t)hidden_size);
+    // Note: _xh and _c are also modified during inference but they are
+    // loaded from weights (initial state), so counted as weights above.
+  }
+
+  // _head_weight(out_channels, hidden_size)
+  r.add_weights((size_t)out_channels * hidden_size);
+  // _head_bias(out_channels)
+  r.add_weights(out_channels);
+
+  // Top-level buffers: _input(input_size), _output(out_channels)
+  r.add_buffers(input_size);
+  r.add_buffers(out_channels);
+
+  return r;
+}
+
+// ─── Linear ─────────────────────────────────────────────────────────────────
+
+static MemoryResult linear_memory(const json& config)
+{
+  MemoryResult r;
+  int receptive_field = config["receptive_field"];
+  bool bias = config["bias"];
+  int in_channels = config.value("in_channels", 1);
+  int out_channels = config.value("out_channels", 1);
+
+  // _weight(receptive_field)
+  r.add_weights(receptive_field);
+  // _bias (scalar float)
+  if (bias)
+    r.add_weights(1);
+
+  // Buffer base: _input_buffers = in_channels vectors of (32 * receptive_field)
+  r.add_buffers((size_t)in_channels * INPUT_BUFFER_SAFETY_FACTOR * receptive_field);
+  // _output_buffers: resized per-call, not pre-allocated to a fixed size
+  // (depends on num_frames, not maxBufferSize)
+
+  return r;
+}
+
+// ─── ConvNet ────────────────────────────────────────────────────────────────
+
+static MemoryResult convnet_memory(const json& config, int M)
+{
+  MemoryResult r;
+  int channels = config["channels"];
+  std::vector<int> dilations = config["dilations"];
+  bool batchnorm = config["batchnorm"];
+  int groups = config.value("groups", 1);
+  int in_channels = config.value("in_channels", 1);
+  int out_channels = config.value("out_channels", 1);
+
+  int max_dilation = *std::max_element(dilations.begin(), dilations.end());
+
+  // Buffer base class: _input_buffers = in_channels * (32 * max_dilation)
+  int receptive_field = max_dilation; // passed to Buffer as receptive_field
+  r.add_buffers((size_t)in_channels * INPUT_BUFFER_SAFETY_FACTOR * receptive_field);
+
+  // ConvNet blocks
+  for (size_t i = 0; i < dilations.size(); i++)
+  {
+    int block_in = (i == 0) ? in_channels : channels;
+    int block_out = channels;
+    // Conv1D with kernel_size=2, bias=!batchnorm
+    r += conv1d_memory(block_in, block_out, 2, !batchnorm, dilations[i], groups, M);
+    // Optional batchnorm
+    if (batchnorm)
+      r += batchnorm_memory(block_out);
+    // _output(out_channels, M) per block
+    r.add_buffers((size_t)block_out * M);
+  }
+
+  // _block_vals: 1 entry of (channels, buffer_size)
+  // buffer_size = input_buffers[0].size() = 32 * receptive_field
+  long buffer_size = INPUT_BUFFER_SAFETY_FACTOR * receptive_field;
+  r.add_buffers((size_t)channels * buffer_size);
+
+  // _head: weight(out_channels, channels) + bias(out_channels)
+  r.add_weights((size_t)out_channels * channels);
+  r.add_weights(out_channels);
+
+  // _head_output is resized per-call, not a fixed pre-allocation
+
+  return r;
+}
+
+// ─── WaveNet helpers ────────────────────────────────────────────────────────
+
+static FiLMParams parse_film_params(const json& layer_config, const std::string& key)
+{
+  FiLMParams fp;
+  if (layer_config.find(key) == layer_config.end() || layer_config[key] == false)
+    return fp; // inactive
+  const json& fc = layer_config[key];
+  fp.active = fc.value("active", true);
+  fp.shift = fc.value("shift", true);
+  fp.groups = fc.value("groups", 1);
+  return fp;
+}
+
+enum class GatingMode
+{
+  NONE,
+  GATED,
+  BLENDED
+};
+
+static std::vector<GatingMode> parse_gating_modes(const json& layer_config, size_t num_layers)
+{
+  std::vector<GatingMode> modes;
+
+  auto parse_str = [](const std::string& s) -> GatingMode {
+    if (s == "gated")
+      return GatingMode::GATED;
+    if (s == "blended")
+      return GatingMode::BLENDED;
+    return GatingMode::NONE;
+  };
+
+  if (layer_config.find("gating_mode") != layer_config.end())
+  {
+    if (layer_config["gating_mode"].is_array())
+    {
+      for (const auto& gm : layer_config["gating_mode"])
+        modes.push_back(parse_str(gm.get<std::string>()));
+    }
+    else
+    {
+      GatingMode mode = parse_str(layer_config["gating_mode"].get<std::string>());
+      modes.resize(num_layers, mode);
+    }
+  }
+  else if (layer_config.find("gated") != layer_config.end())
+  {
+    bool gated = layer_config["gated"];
+    modes.resize(num_layers, gated ? GatingMode::GATED : GatingMode::NONE);
+  }
+  else
+  {
+    modes.resize(num_layers, GatingMode::NONE);
+  }
+  return modes;
+}
+
+// WaveNet _Layer memory
+static MemoryResult wavenet_layer_memory(int condition_size, int channels, int bottleneck, int kernel_size, int dilation,
+                                         GatingMode gating_mode, int groups_input, int groups_input_mixin,
+                                         bool layer1x1_active, int layer1x1_groups, bool head1x1_active,
+                                         int head1x1_out_channels, int head1x1_groups, const FiLMParams& conv_pre_film,
+                                         const FiLMParams& conv_post_film, const FiLMParams& input_mixin_pre_film,
+                                         const FiLMParams& input_mixin_post_film,
+                                         const FiLMParams& activation_pre_film,
+                                         const FiLMParams& activation_post_film,
+                                         const FiLMParams& layer1x1_post_film, const FiLMParams& head1x1_post_film,
+                                         int M)
+{
+  MemoryResult r;
+  bool gated = (gating_mode != GatingMode::NONE);
+  int conv_out = gated ? 2 * bottleneck : bottleneck;
+
+  // _conv: Conv1D(channels -> conv_out, kernel_size, bias=true, dilation, groups_input)
+  r += conv1d_memory(channels, conv_out, kernel_size, true, dilation, groups_input, M);
+
+  // _input_mixin: Conv1x1(condition_size -> conv_out, bias=false, groups_input_mixin)
+  r += conv1x1_memory(condition_size, conv_out, false, groups_input_mixin, M);
+
+  // _layer1x1 (optional): Conv1x1(bottleneck -> channels, bias=true, layer1x1_groups)
+  if (layer1x1_active)
+    r += conv1x1_memory(bottleneck, channels, true, layer1x1_groups, M);
+
+  // _head1x1 (optional): Conv1x1(bottleneck -> head1x1_out_channels, bias=true, head1x1_groups)
+  if (head1x1_active)
+    r += conv1x1_memory(bottleneck, head1x1_out_channels, true, head1x1_groups, M);
+
+  // Buffers: _z(conv_out, M)
+  r.add_buffers((size_t)conv_out * M);
+  // _output_next_layer(channels, M)
+  r.add_buffers((size_t)channels * M);
+  // _output_head: if head1x1 active -> (head1x1_out_channels, M), else (bottleneck, M)
+  int head_out = head1x1_active ? head1x1_out_channels : bottleneck;
+  r.add_buffers((size_t)head_out * M);
+
+  // FiLM modules (up to 8)
+  r += film_memory(condition_size, channels, conv_pre_film, M);
+  r += film_memory(condition_size, conv_out, conv_post_film, M);
+  r += film_memory(condition_size, condition_size, input_mixin_pre_film, M);
+  r += film_memory(condition_size, conv_out, input_mixin_post_film, M);
+  r += film_memory(condition_size, conv_out, activation_pre_film, M);
+  r += film_memory(condition_size, bottleneck, activation_post_film, M);
+  if (layer1x1_active)
+    r += film_memory(condition_size, channels, layer1x1_post_film, M);
+  if (head1x1_active)
+    r += film_memory(condition_size, head1x1_out_channels, head1x1_post_film, M);
+
+  return r;
+}
+
+// WaveNet _LayerArray memory
+static MemoryResult wavenet_layer_array_memory(const json& layer_config, int M)
+{
+  MemoryResult r;
+  int input_size = layer_config["input_size"];
+  int condition_size = layer_config["condition_size"];
+  int head_size = layer_config["head_size"];
+  int channels = layer_config["channels"];
+  int bottleneck = layer_config.value("bottleneck", channels);
+  int kernel_size = layer_config["kernel_size"];
+  std::vector<int> dilations = layer_config["dilations"];
+  size_t num_layers = dilations.size();
+  bool head_bias = layer_config["head_bias"];
+
+  int groups_input = layer_config.value("groups_input", 1);
+  int groups_input_mixin = layer_config.value("groups_input_mixin", 1);
+
+  // layer1x1 params
+  bool layer1x1_active = true;
+  int layer1x1_groups = 1;
+  if (layer_config.find("layer1x1") != layer_config.end())
+  {
+    layer1x1_active = layer_config["layer1x1"]["active"];
+    layer1x1_groups = layer_config["layer1x1"]["groups"];
+  }
+
+  // head1x1 params
+  bool head1x1_active = false;
+  int head1x1_out_channels = channels;
+  int head1x1_groups = 1;
+  if (layer_config.find("head1x1") != layer_config.end())
+  {
+    head1x1_active = layer_config["head1x1"]["active"];
+    head1x1_out_channels = layer_config["head1x1"]["out_channels"];
+    head1x1_groups = layer_config["head1x1"]["groups"];
+  }
+
+  // Gating modes
+  std::vector<GatingMode> gating_modes = parse_gating_modes(layer_config, num_layers);
+
+  // FiLM params
+  FiLMParams conv_pre = parse_film_params(layer_config, "conv_pre_film");
+  FiLMParams conv_post = parse_film_params(layer_config, "conv_post_film");
+  FiLMParams input_mixin_pre = parse_film_params(layer_config, "input_mixin_pre_film");
+  FiLMParams input_mixin_post = parse_film_params(layer_config, "input_mixin_post_film");
+  FiLMParams activation_pre = parse_film_params(layer_config, "activation_pre_film");
+  FiLMParams activation_post = parse_film_params(layer_config, "activation_post_film");
+  FiLMParams layer1x1_post = parse_film_params(layer_config, "layer1x1_post_film");
+  FiLMParams head1x1_post = parse_film_params(layer_config, "head1x1_post_film");
+
+  // _rechannel: Conv1x1(input_size -> channels, bias=false)
+  r += conv1x1_memory(input_size, channels, false, 1, M);
+
+  // Per-layer
+  for (size_t i = 0; i < num_layers; i++)
+  {
+    r += wavenet_layer_memory(condition_size, channels, bottleneck, kernel_size, dilations[i], gating_modes[i],
+                              groups_input, groups_input_mixin, layer1x1_active, layer1x1_groups, head1x1_active,
+                              head1x1_out_channels, head1x1_groups, conv_pre, conv_post, input_mixin_pre,
+                              input_mixin_post, activation_pre, activation_post, layer1x1_post, head1x1_post, M);
+  }
+
+  // _head_rechannel: Conv1x1(head_output_size -> head_size, bias=head_bias)
+  int head_output_size = head1x1_active ? head1x1_out_channels : bottleneck;
+  r += conv1x1_memory(head_output_size, head_size, head_bias, 1, M);
+
+  // Buffers: _layer_outputs(channels, M)
+  r.add_buffers((size_t)channels * M);
+  // _head_inputs(head_output_size, M)
+  r.add_buffers((size_t)head_output_size * M);
+
+  return r;
+}
+
+// Forward declaration for recursive condition_dsp
+static MemoryResult compute_memory(const std::string& architecture, const json& config, int M);
+
+// WaveNet top-level memory
+static MemoryResult wavenet_memory(const json& config, int M)
+{
+  MemoryResult r;
+  int in_channels = config.value("in_channels", 1);
+
+  // condition_dim = in_channels (from _get_condition_dim())
+  int condition_dim = in_channels;
+
+  // Recursive condition_dsp
+  bool has_condition_dsp = false;
+  int condition_output_channels = condition_dim;
+  if (config.find("condition_dsp") != config.end())
+  {
+    has_condition_dsp = true;
+    const json& cdsp = config["condition_dsp"];
+    std::string cdsp_arch = cdsp["architecture"];
+    json cdsp_config = cdsp["config"];
+    r += compute_memory(cdsp_arch, cdsp_config, M);
+    // condition_output_channels comes from the condition_dsp's output
+    // For now, we use condition_size from first layer as a proxy
+    // (the actual model validates this match)
+    if (config.find("layers") != config.end() && config["layers"].size() > 0)
+      condition_output_channels = config["layers"][0]["condition_size"];
+  }
+
+  // _condition_input(condition_dim, M)
+  r.add_buffers((size_t)condition_dim * M);
+
+  // _condition_output
+  if (!has_condition_dsp)
+  {
+    // _condition_output(condition_dim, M)
+    r.add_buffers((size_t)condition_dim * M);
+  }
+  else
+  {
+    // _condition_output(condition_output_channels, M)
+    r.add_buffers((size_t)condition_output_channels * M);
+    // _condition_dsp_input_buffers: condition_dim vectors of M doubles/floats
+    // These are std::vector<std::vector<NAM_SAMPLE>> where NAM_SAMPLE is double
+    r.add_buffers((size_t)condition_dim * M * (sizeof(double) / sizeof(float)));
+    // _condition_dsp_output_buffers: condition_output_channels vectors of M doubles
+    r.add_buffers((size_t)condition_output_channels * M * (sizeof(double) / sizeof(float)));
+    // Pointer arrays are negligible
+  }
+
+  // Layer arrays
+  for (const auto& layer_config : config["layers"])
+    r += wavenet_layer_array_memory(layer_config, M);
+
+  // _head_scale (1 float) — it's a weight
+  r.add_weights(1);
+
+  return r;
+}
+
+// ─── Dispatch ───────────────────────────────────────────────────────────────
+
+static MemoryResult compute_memory(const std::string& architecture, const json& config, int M)
+{
+  if (architecture == "WaveNet")
+    return wavenet_memory(config, M);
+  if (architecture == "LSTM")
+    return lstm_memory(config);
+  if (architecture == "ConvNet")
+    return convnet_memory(config, M);
+  if (architecture == "Linear")
+    return linear_memory(config);
+  throw std::runtime_error("Unknown architecture: " + architecture);
+}
+
+// ─── Formatting helpers ─────────────────────────────────────────────────────
+
+static std::string format_bytes(size_t bytes)
+{
+  char buf[64];
+  if (bytes < 1024)
+    snprintf(buf, sizeof(buf), "%zu bytes", bytes);
+  else if (bytes < 1024 * 1024)
+    snprintf(buf, sizeof(buf), "%.2f KB", bytes / 1024.0);
+  else
+    snprintf(buf, sizeof(buf), "%.2f MB", bytes / (1024.0 * 1024.0));
+  return buf;
+}
+
+static std::string format_with_commas(size_t n)
+{
+  std::string s = std::to_string(n);
+  int insert_pos = (int)s.length() - 3;
+  while (insert_pos > 0)
+  {
+    s.insert(insert_pos, ",");
+    insert_pos -= 3;
+  }
+  return s;
+}
+
+// ─── Main ───────────────────────────────────────────────────────────────────
+
+int main(int argc, char* argv[])
+{
+  if (argc < 2)
+  {
+    fprintf(stderr, "Usage: memory_usage <model_path> [--buffer-size N]\n");
+    return 1;
+  }
+
+  const char* model_path = argv[1];
+  int buffer_size = DEFAULT_BUFFER_SIZE;
+
+  for (int i = 2; i < argc; i++)
+  {
+    if (strcmp(argv[i], "--buffer-size") == 0 && i + 1 < argc)
+    {
+      buffer_size = atoi(argv[++i]);
+      if (buffer_size <= 0)
+      {
+        fprintf(stderr, "Error: buffer size must be positive\n");
+        return 1;
+      }
+    }
+    else
+    {
+      fprintf(stderr, "Unknown option: %s\n", argv[i]);
+      return 1;
+    }
+  }
+
+  // Read and parse JSON
+  std::ifstream file(model_path);
+  if (!file.is_open())
+  {
+    fprintf(stderr, "Error: cannot open %s\n", model_path);
+    return 1;
+  }
+
+  json j;
+  try
+  {
+    file >> j;
+  }
+  catch (const std::exception& e)
+  {
+    fprintf(stderr, "Error parsing JSON: %s\n", e.what());
+    return 1;
+  }
+
+  std::string architecture = j["architecture"];
+  json config = j["config"];
+
+  // Cross-check: count weights in JSON
+  size_t json_weight_count = 0;
+  if (j.find("weights") != j.end())
+    json_weight_count = j["weights"].size();
+
+  double sample_rate = -1.0;
+  if (j.find("sample_rate") != j.end())
+    sample_rate = j["sample_rate"];
+
+  try
+  {
+    MemoryResult result = compute_memory(architecture, config, buffer_size);
+    size_t total = result.weight_bytes + result.buffer_bytes;
+
+    printf("Model: %s\n", model_path);
+    printf("Architecture: %s\n", architecture.c_str());
+    if (sample_rate > 0)
+      printf("Sample rate: %.0f Hz\n", sample_rate);
+    printf("\n");
+    printf("Weights:  %s bytes (%s)\n", format_with_commas(result.weight_bytes).c_str(),
+           format_bytes(result.weight_bytes).c_str());
+    printf("Buffers:  %s bytes (%s)  [buffer size: %d]\n", format_with_commas(result.buffer_bytes).c_str(),
+           format_bytes(result.buffer_bytes).c_str(), buffer_size);
+    printf("Total:    %s bytes (%s)\n", format_with_commas(total).c_str(), format_bytes(total).c_str());
+
+    if (json_weight_count > 0)
+    {
+      printf("\nJSON weights: %zu values (%s bytes)\n", json_weight_count,
+             format_with_commas(json_weight_count * sizeof(float)).c_str());
+    }
+  }
+  catch (const std::exception& e)
+  {
+    fprintf(stderr, "Error computing memory: %s\n", e.what());
+    return 1;
+  }
+
+  return 0;
+}

From c4b84f50b30b2573d4ec74a4dc408f2ab80c8678 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 20 Feb 2026 17:41:55 -0800
Subject: [PATCH 2/4] Add render tool for processing WAV files through .nam
 models

- Add AudioDSPTools as submodule for WAV input (dsp::wav::Load)
- Add tools/render: loads model, reads input WAV, processes, writes 32-bit float output
- Usage: render <model.nam> <input.wav> [output.wav]
- Supports mono input; validates sample rate matches model

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .gitmodules                |   3 +
 Dependencies/AudioDSPTools |   1 +
 tools/CMakeLists.txt       |  30 ++++++-
 tools/render.cpp           | 159 +++++++++++++++++++++++++++++++++++++
 4 files changed, 192 insertions(+), 1 deletion(-)
 create mode 160000 Dependencies/AudioDSPTools
 create mode 100644 tools/render.cpp

diff --git a/.gitmodules b/.gitmodules
index 11c19841..f49ce6e8 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "Dependencies/eigen"]
 	path = Dependencies/eigen
 	url = https://gitlab.com/libeigen/eigen
+[submodule "Dependencies/AudioDSPTools"]
+	path = Dependencies/AudioDSPTools
+	url = https://github.com/sdatkinson/AudioDSPTools.git
diff --git a/Dependencies/AudioDSPTools b/Dependencies/AudioDSPTools
new file mode 160000
index 00000000..0827c6c2
--- /dev/null
+++ b/Dependencies/AudioDSPTools
@@ -0,0 +1 @@
+Subproject commit 0827c6c2fc0deced568536142ea86f189e0b98a1
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 8f02f20e..bbe93f3e 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -4,14 +4,42 @@ file(GLOB_RECURSE NAM_SOURCES ../NAM/*.cpp ../NAM/*.c ../NAM*.h)
 set(TOOLS benchmodel)
 
 add_custom_target(tools ALL
-	DEPENDS ${TOOLS})
+	DEPENDS ${TOOLS} render)
+
+set(AUDIO_DSP_TOOLS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../Dependencies/AudioDSPTools")
+set(AUDIO_DSP_TOOLS_WAV_SOURCES "${AUDIO_DSP_TOOLS_DIR}/dsp/wav.cpp")
 
 include_directories(tools ..)
 include_directories(tools ${NAM_DEPS_PATH}/eigen)
 include_directories(tools ${NAM_DEPS_PATH}/nlohmann)
+include_directories(tools ${AUDIO_DSP_TOOLS_DIR}/dsp)
 
 add_executable(loadmodel loadmodel.cpp ${NAM_SOURCES})
 add_executable(benchmodel benchmodel.cpp ${NAM_SOURCES})
+add_executable(render render.cpp ${NAM_SOURCES} ${AUDIO_DSP_TOOLS_WAV_SOURCES})
+target_compile_features(render PUBLIC cxx_std_20)
+# AudioDSPTools wav.cpp has sign-compare issues; don't fail build
+set_source_files_properties(${AUDIO_DSP_TOOLS_WAV_SOURCES} PROPERTIES COMPILE_FLAGS "-Wno-error")
+set_target_properties(render PROPERTIES
+	CXX_VISIBILITY_PRESET hidden
+	INTERPROCEDURAL_OPTIMIZATION TRUE
+	PREFIX ""
+)
+if (CMAKE_SYSTEM_NAME STREQUAL "Windows")
+	target_compile_definitions(render PRIVATE NOMINMAX WIN32_LEAN_AND_MEAN)
+endif()
+if (MSVC)
+	target_compile_options(render PRIVATE
+		"$<$<CONFIG:DEBUG>:/W4>"
+		"$<$<CONFIG:RELEASE>:/O2>"
+	)
+else()
+	target_compile_options(render PRIVATE
+		-Wall -Wextra -Wpedantic -Wstrict-aliasing -Wunreachable-code -Weffc++ -Wno-unused-parameter
+		"$<$<CONFIG:DEBUG>:-Og;-ggdb;-Werror>"
+		"$<$<CONFIG:RELEASE>:-Ofast>"
+	)
+endif()
 add_executable(memory_usage memory_usage.cpp)
 add_executable(run_tests run_tests.cpp test/allocation_tracking.cpp ${NAM_SOURCES})
 # Compile run_tests without optimizations to ensure allocation tracking works correctly
diff --git a/tools/render.cpp b/tools/render.cpp
new file mode 100644
index 00000000..77836b41
--- /dev/null
+++ b/tools/render.cpp
@@ -0,0 +1,159 @@
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+#include "NAM/dsp.h"
+#include "NAM/get_dsp.h"
+#include "wav.h"
+
+namespace
+{
+// Write mono 32-bit float WAV file (IEEE float format 3).
+bool SaveWavFloat32(const char* fileName, const float* samples, size_t numSamples, double sampleRate)
+{
+  std::ofstream out(fileName, std::ios::binary);
+  if (!out.is_open())
+  {
+    std::cerr << "Error: Failed to open output file " << fileName << "\n";
+    return false;
+  }
+
+  const uint32_t dataSize = static_cast<uint32_t>(numSamples * sizeof(float));
+  const uint32_t chunkSize = 36 + dataSize;
+
+  // RIFF header
+  out.write("RIFF", 4);
+  out.write(reinterpret_cast<const char*>(&chunkSize), 4);
+  out.write("WAVE", 4);
+
+  // fmt chunk (16 bytes for PCM/IEEE)
+  const uint32_t fmtSize = 16;
+  out.write("fmt ", 4);
+  out.write(reinterpret_cast<const char*>(&fmtSize), 4);
+  const uint16_t audioFormat = 3;  // IEEE float
+  out.write(reinterpret_cast<const char*>(&audioFormat), 2);
+  const uint16_t numChannels = 1;
+  out.write(reinterpret_cast<const char*>(&numChannels), 2);
+  const uint32_t sr = static_cast<uint32_t>(sampleRate);
+  out.write(reinterpret_cast<const char*>(&sr), 4);
+  const uint32_t byteRate = sr * sizeof(float);
+  out.write(reinterpret_cast<const char*>(&byteRate), 4);
+  const uint16_t blockAlign = sizeof(float);
+  out.write(reinterpret_cast<const char*>(&blockAlign), 2);
+  const uint16_t bitsPerSample = 32;
+  out.write(reinterpret_cast<const char*>(&bitsPerSample), 2);
+
+  // data chunk
+  out.write("data", 4);
+  out.write(reinterpret_cast<const char*>(&dataSize), 4);
+  out.write(reinterpret_cast<const char*>(samples), dataSize);
+
+  return out.good();
+}
+
+}  // namespace
+
+int main(int argc, char* argv[])
+{
+  if (argc < 3 || argc > 4)
+  {
+    std::cerr << "Usage: render <model.nam> <input.wav> [output.wav]\n";
+    return 1;
+  }
+
+  const char* modelPath = argv[1];
+  const char* inputPath = argv[2];
+  const char* outputPath = (argc >= 4) ? argv[3] : "output.wav";
+
+  std::cerr << "Loading model [" << modelPath << "]\n";
+  auto model = nam::get_dsp(std::filesystem::path(modelPath));
+  if (!model)
+  {
+    std::cerr << "Failed to load model\n";
+    return 1;
+  }
+  std::cerr << "Model loaded successfully\n";
+
+  std::vector<float> inputAudio;
+  double inputSampleRate = 0.0;
+  auto loadResult = dsp::wav::Load(inputPath, inputAudio, inputSampleRate);
+  if (loadResult != dsp::wav::LoadReturnCode::SUCCESS)
+  {
+    std::cerr << "Failed to load input WAV: " << dsp::wav::GetMsgForLoadReturnCode(loadResult) << "\n";
+    return 1;
+  }
+
+  const double expectedRate = model->GetExpectedSampleRate();
+  if (expectedRate > 0 && std::abs(inputSampleRate - expectedRate) > 0.5)
+  {
+    std::cerr << "Error: Input WAV sample rate (" << inputSampleRate
+              << " Hz) does not match model expected rate (" << expectedRate << " Hz)\n";
+    return 1;
+  }
+
+  const double sampleRate = expectedRate > 0 ? expectedRate : inputSampleRate;
+  const int bufferSize = 64;
+  model->Reset(sampleRate, bufferSize);
+
+  const int inChannels = model->NumInputChannels();
+  const int outChannels = model->NumOutputChannels();
+
+  if (inChannels != 1)
+  {
+    std::cerr << "Error: render tool currently supports mono input only (model has " << inChannels
+              << " input channels)\n";
+    return 1;
+  }
+
+  std::vector<std::vector<NAM_SAMPLE>> inputBuffers(inChannels);
+  std::vector<std::vector<NAM_SAMPLE>> outputBuffers(outChannels);
+  std::vector<NAM_SAMPLE*> inputPtrs(inChannels);
+  std::vector<NAM_SAMPLE*> outputPtrs(outChannels);
+
+  for (int ch = 0; ch < inChannels; ch++)
+  {
+    inputBuffers[ch].resize(bufferSize, 0.0);
+    inputPtrs[ch] = inputBuffers[ch].data();
+  }
+  for (int ch = 0; ch < outChannels; ch++)
+  {
+    outputBuffers[ch].resize(bufferSize, 0.0);
+    outputPtrs[ch] = outputBuffers[ch].data();
+  }
+
+  std::vector<float> outputAudio;
+  outputAudio.reserve(static_cast<size_t>(outChannels) * inputAudio.size());
+
+  size_t readPos = 0;
+  const size_t totalSamples = inputAudio.size();
+
+  while (readPos < totalSamples)
+  {
+    const size_t toRead = std::min(static_cast<size_t>(bufferSize), totalSamples - readPos);
+
+    for (size_t i = 0; i < toRead; i++)
+      inputBuffers[0][i] = static_cast<NAM_SAMPLE>(inputAudio[readPos + i]);
+    for (size_t i = toRead; i < static_cast<size_t>(bufferSize); i++)
+      inputBuffers[0][i] = 0;
+
+    model->process(inputPtrs.data(), outputPtrs.data(), static_cast<int>(toRead));
+
+    for (size_t i = 0; i < toRead; i++)
+      outputAudio.push_back(static_cast<float>(outputBuffers[0][i]));
+
+    readPos += toRead;
+  }
+
+  if (!SaveWavFloat32(outputPath, outputAudio.data(), outputAudio.size(), sampleRate))
+  {
+    return 1;
+  }
+
+  std::cerr << "Wrote " << outputAudio.size() << " samples to " << outputPath << "\n";
+  return 0;
+}

From 4ec410e5cbbac54b123ddddd36bc621af9b189bd Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 20 Feb 2026 17:42:37 -0800
Subject: [PATCH 3/4] Add use of render tool in tests

---
 .github/workflows/build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 62f2a7e2..929263b1 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -41,3 +41,4 @@ jobs:
         ./build/tools/run_tests
         ./build/tools/benchmodel ./example_models/wavenet.nam
         ./build/tools/benchmodel ./example_models/lstm.nam
+        ./build/tools/render ./example_models/wavenet.nam ./example_models/wavenet.nam

From c001c71ec505c9d13e0ccf3e6559e9b889ff7ddf Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 20 Feb 2026 17:48:16 -0800
Subject: [PATCH 4/4] Example input audio, fix render tool test

---
 .github/workflows/build.yml |   2 +-
 .gitignore                  |   2 ++
 example_audio/input.wav     | Bin 0 -> 288044 bytes
 3 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 example_audio/input.wav

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 929263b1..83c452fb 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -41,4 +41,4 @@ jobs:
         ./build/tools/run_tests
         ./build/tools/benchmodel ./example_models/wavenet.nam
         ./build/tools/benchmodel ./example_models/lstm.nam
-        ./build/tools/render ./example_models/wavenet.nam ./example_models/wavenet.nam
+        ./build/tools/render ./example_models/wavenet.nam ./example_audio/input.wav ./example_audio/output.wav
diff --git a/.gitignore b/.gitignore
index b7ee58e6..34ee36ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,3 +35,5 @@
 
 docs/_build/
 *.DS_Store
+
+example_audio/output.wav
diff --git a/example_audio/input.wav b/example_audio/input.wav
new file mode 100644
index 0000000000000000000000000000000000000000..fd0302bd119965cb185437a0fc42dfda94193cd4
GIT binary patch
literal 288044
zcmeI5`FBqD`oEuqm_x*zMk0|&A~Mg@v+F2I)mZb;(gCH$qN?Vqd9G7VOIt-zV_Q{2
zbw8Pjh)5#Fgv3llVh)kd<v;lT`d#aF&RShv8qc%$wcqdie%-dcuijlcb@KYSx}|^n
zejPvg?DJMm7K_D>|IayMvCIjzw^Xw<w|q4G^WheLM*kB55fA|p5CIVo0TB=Z5fA|p
z5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo
z0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z
z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p
z5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo
z0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z
z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p
z5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo
z0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z
z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p
z5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo
z0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z
z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p
z5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo
z0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z
z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p
z5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5CIVo0TB=Z5fA|p5P|<k0*U!{9($`DT;Sj_
zxaNz%+C`SSpP#PRZDakqBb~YjIR91J=<4n!lSejlbZyZ)!{z4Zt`nQPtv}#it((Wt
zi=Mqc^jcc%eR#0XiVWYsL)+e8<$vvZyIvuIWs`zt?hHPVA2OgK)XzEmu3N;&R*@e!
zjr!dZ?NS&$VSmiRX|drUaXky-4$g}2<eYGMeZmN5>!DfJjLX&qK}OzWW7kf@l53=v
z8Woks`d7x!Wk%C{qjHb&!E~cbq>*~vdS{_E&DHw(mV|ol2|Jd>k1dIt+CJ{yqS(ht
zF^9{eM>UBKY8PdViaZ$^;piV8-#Bbdc}Vxv;Jqt@o^}r$Q$3*PKL66*ZCBs({d%TP
z;~H(&eB;%l%5&ockDLqc9j$KNzIT1I++{|G7K2tci$2?Aa`VPx`Z>>B<y0@N{<MmE
z9{zR54y=7-LCuN#9LmpE+iY)tt*yna&#OOXysWXcVrJH}FJC+v?)P}zz=zY9m5oa*
zJ^P}hVa&bRU*4Hz-ukTW&FzD)FWp}}(XnX%_$%Aa6>NF?vc<gEz_{R<dTvqQ{M99S
zi&o_R9i21!UUu=0)1xP5Sv#EC?4FrZ<D}#B6N{c4pIMde);Rsg@U+0;sn0f~_!TDK
z3{LL9GU<nk#Jov~?$2!Nme}5HYa5(z=Bzet1I@4?GsM}n*_$^k=H_~4nYWqM%?zDu
zIwhHdoNadFZ4b`b8um==nwvQ1<D|9Kl1J=GKK^ma&wi=q^VEvGG>>EH)qX!dX8#HA
zqZ!Y#Pi`;IeAY5+d!N%^tj+ekd}c&&?)_zXv&zpVe{k;e{PWg+7f)Zhbnepvi{;9r
z@2?KA78l&O=CSU^iMMafu)A~a*xk8vN}6{njc-u4^kVteosWFJda|d-vwW`#>x&n&
z&s0Y3tg_6r`(j|V*kFfZhnkKTYW3SyXUJFe4tJ^Fu(4C+g@*2%8Z{f*<eO&A>Ss3Z
zJ=5ihkLyn-TR#2NeY3qsx3!*|T)jGN@-A!U({r(JqjJB}Uj7@lw@ZH<5D^pf{iNVS
zKZm@V8rtGYSn>Vv3FQ%s??gU78@2Law9B%Xaqq=`;1rksZ`_;_@gtwb51pIvR%L?y
zSnKRe>vj)g^eE%N&&I4w<LF&u)JtPll~MJ`xO>Uya?oh?wK1cEG5WFf*#>Juh_%O&
zgpSb(@!RA3)Q$gbc-+X}V)x#P+0`Oui8Xp!|EMh=MRxooV%3oFW1Yil`G)SU48EHl
zRDDTceEWd&_x(NBwjCbmS7iD;?AoU2q?di?)>{sFxOulqUhGzS)AgzGM&z0nwF;Z1
zw`rO?y7AKO&hHdEO?7E7w!7oX*>&6RseS!I%>suStAeTz8(_bCj-_b(t6|wM7gki*
zyq-_!`Sj_`$A@-5Y<{I|T9XIS-R~b-a&KtHojFdo=MB1P?zq10Rq>F4MdJ=%@$xLp
zU3EFP#-*uWU#MT_{N3;JhXkJ8otf)DHOJNW%<<yW({^NyoPO$JpUj3aCp-FO?DIOY
z)c<&GYx>Y3X@7r{+UcK^P)o|z9?40+CGBsVw0ue86vxD`H`=meZ4<AX(>9vrBh32|
z=9H%9^6F-+#hh8&?Ayvb`j)wKs@e1}v&`P+Kh(BA*)}dFvGb9{)?Jf|t|xuCB6(hi
z6c>k7yPVXzyVL5fO+WSZanE@t&Me8;vgzcoe=<{Uochk?^tS`Dr~h=uQl4|QTVBq-
zvx{4vJGkb2eW#1@>n@dg6r4FwxT@#XGi61_hHEoB-3WPpvufY%C7<1$6j(Cw;r+rR
z4>m6>Z{P3HAfG2oE1r(edcI}ri<GY_$G=w<-_GttwQ2`0RA0ZZMuP>l4)(7T;9IZ3
z6UXOfgJp{ve%!55n|e*gr!_5_+1xY8CFjB$|C`g&t(AL?L#<wR_1u})`kw*bMLBJb
zck*q$-|rV^|5u;3yL%|0byeVq*x(Pxgxpvix_funjAP-=k|Vkti@dZeYU9f2p`XOu
z3y$?Hjm`ZjuApPQYeD?D(Fq4{CR`j~O*(2F*vMEm#CT`Du_V>7-ZVy37-dz)ou@|k
zBBS-c#`qORLQms=)s6rDW__!p^>B7V!(Is=CdFs@#!s0WH$5}f-Z3`%&6rLjqX*B6
z+P*q+*t&=V%fqjI8Rj)K^w02+Y`frN$$|gO3}_V4u69n_o8$eSSMy!IvQ3$@_tX`w
z$J=`bjc?_c=k|Vh%UR3cNWRiyX<+l<X-%DuG+tcc{34=Z>SqnQZg#w#Q`ezd9lPLK
z?FZNJSx~*%9{VY0Ex*2c)xf8+Vb2%NUp}9@_32M%9+%ra$_^{vIO@TnpYDHs>D~vf
zcYpifcJhv!HJ@G2e&^cEtwkR>TrC(=*x~%;Tkl@-IC0_4?&tq|A%D{NvmTClC4c02
zesspcE&I~dtUG&8O<0t<Ys|?@12ba!oOsdy_<&LA^S(~ou`BiEm6WX>DW}FI|B#*(
z7L@eI=ET_6iADQtsqfgHJv3MBFxQPWT@y_IH_VJ0rp5C5pZq(&!msmR`5AtapXYb+
zd-()Dhfm`(`D8wyZ{fT6M!uJC=NLE=j)$Y-*f>ItlcVLBIdYDlv*27fBhHJn;~Y6t
z&X=?1+#vu5Xut$A@PQJnAO<(+!4Q)0geq(y3}<M=9P;qzDsU~hB3u`)4%dh)#r5K<
zaqYN*Tt}`Z*OV*E_2nvat-0b{cdkBbfR(^{U{$a-SRt$vRtsx}mBadB6|t6BQLHOg
z7i)}_#(HDbvG!PjtV32KYm$}8`ec=|R#~yETUIY?n3c?WW>vGcS>dd6Ry%8+mCyQT
z7qA!D5$p?g2YZB_!hT`buy@!&>?3v)dy1XK{$iK0*Vu9FJ9Zy?ke$eWWLL5`*`e%H
zb}M_9oy-1Z7qgey(d=t>H+!6&&VFatv-hb0)B&miHG#@NeV|HEE2tRMjn~!VKp~Zc
zdO}s9woqZHGgKRD4wZ-cLlvSHQIV)iR3~Z_m5O>r)uMJ$!Kh<YGin-@jrvBFqt;RJ
zsC!gDY9N)6dPr5IHc}y}lT=G;CY6)=Nfo7*Qc<a^R99*&m6m!-)ur}QfvLk(V`?&$
znfgqXrdCt2soPX<YB-ggdQMfRwo~D$^Hh6kK9!&PPZyvU&=KehbO(9_oq~Qr*PwUM
zLFglN6M71rh5ka9q1VuH=sR>DdJvt6eneNIH_@T!Q*<kO7M+X!MHi!&(b4E@bT@k3
z>rQ9-(DmqjbU^wb-H@J0XQV&UCFzxPO!_9>lO9SZrJvGO>8*5F`Yhd+o=fMY|I&r&
z#dKu)GToUTO{b<`)3xc{ba47O-JG6IXQ#i@<>~cweEL4!pA0|}AP<lV$Oa??asp|A
z%s_G=Kae8G5+n+81?hr}LDC>^kUGd7BoJ~4X@pEdG9jOkQphSK7IF*eg$zTIA<vL%
z$TlP#at>*S%tP`a|B!;nLL?$`5$TAGL{cIzk($U(Bq(weX^KomvLauRvdCH_E^-&?
ziws5*Bae~F$Yvz8*Ex-}mVJ`sMt&p3k>yBq<T}zF8IPn#-Xryq{YZf1K++(YkYq?c
zBqfp+NsQ!1(jys?BuSnmRgx`9nB+{-CYh7uN&X~-l0`|R<WkZp8I`0;UM01XT}iOy
zSkmn4OnWbzd`rrGopt->lY2?OWMGmod6-m8HYOpHlS#{DW|A}cnG{WyCQ*~CN!Mg-
zk~Vpp)J^s#fs?~Y<79G@Ir*HFPF5$eliNw}WO$N1d7e~HwkP3}^GW+;ev&`=pDBP@
zfQf*)fa!o4fk}aRfvJJnfeC^+f@y-8g2{sUf+>SpgNcK=gXx1Agh_;XgsFttgb9T?
zg=vMEg~^5ag(-$vhKYu`hUtbGhe?NdhpC6zhY5%|h-rwKh{=fgh$)F#iHV81iRp<M
zib;xjim8g(iV2H3i)o9Qi^+@miz$p*jERi7jOmOSjY*Apjj4^<jR}r9j%kjWj>(Ss
zjwz2>kBN`D@AdSzw;GcmGo#h_RbkJUUB0X+-15vJ`-xlCqh&!4{~S^Fczx;bXG*#{
z-E;4KXZiYD4~uW?3BTTAY4OUNS4VZdQu#;0FJ71T{%~=M>xE8R&P@u>e>*eJd3>&`
zOU~X?*$tMTp5Hg?9p6*i9WwJvGMZdHv7zwz_qWr#SEb!(mA0yT>hwh^Cr>5s^GNP8
zH|cOmV(XEKn@Vh*=h`AXZ8J`r{>#kw`kIz@W~`H0!`>WaXTD$GyyI(D>t+7*jd}36
z8S7$e`h_i~*tTqN;-f2x{U;}V>zusgSaRcODRtvhJ2|GUzmt}olb)D<JpA~Hhq)Qa
zcTbLUJoS4_R<8-CYwyoq_u|aH9=Q#7=Z$Za|JA&6_nx00H~r#%%jFMO76f=-Ibyr|
z=SRh+<MjncZ)}@zE8XwT@|$<p?I@W)zVuOa*<y!>g}INu-S(v8m(N!AsHorO<*>?1
z*SxBtU3Tw&U9Dn}!~Ecy-&C*t<Jmf6w$)oWx&E32r?%CdOH&)oUC?Bn(QNIb<^#64
zOzG~rvb<%FRqlg)Jcb<fw7lt6W_lOI_<XU`cf51kFDClGo!0KVMu9E+1%0<7IPqx6
zuq&Yf&%++pi1?y*<a|q1(cP%w8PR7p#nk#FHosL|i>$aE6XHK|Nbp~taJ8m&%oOV%
zdDgAIM!-13*k)v&HnK_#qtbAxGKM`g8eA|!{xD9>G9JVm+e)letE^5w*3!KRfB7fO
z`Ct5JFXJ}#ii`SR?E7b9-mDQ*qiu9y`>4uyBiD70xY!}QR$$lw$Ix0u!2@;$EgBnG
z;2E$f%ino&+j)+D@jv=3^KFy3!z;LTYu|Mq7E7yoAGvL@xq7yEqwv!fkq4VCc-6$W
zedF1)oo^j-N`6p(ikIWyzID&fuiblJO}~o{iPfvm4X{7n*J7FVYSFfrsaX{*E1o^{
ze!8>I<1O<acKWNV^loW2@B3GW-@Co}&X>1tm51KkGw*txbHyKo71dsMWmsjwpJOkN
zD86`j_=N_y&h49;U*dSS;hx-){d1!0oXJQ%{qgdw&cjaq5tDh-{bb9A8LJ#mOlxwy
zreFHRx6^J+PF?d`%1@=q(QhWt*_gDUX3{US5|7#^zFKRW8)_R+V1D<V`OYwNUa0w2
zV{?5qbDqVtt7|rCZ618vyfVXFaMb*+j;+=wwxm<G{cj~6NlQ%bpS1f?Qr}I<8{SLl
z)hzYB8>v%mX^Xd~5BdK1kqsv%{F3pXzfV5P&+KEL)g<cl%PHB($IkdP%q<w3_ha^1
zH{+b+f%9%*7hnB-X-KDnq6>xbQ?E8`UR>L}cH*-e&syDzK7YI2fA3D|Uvj{;bjkGx
z-|jD;IOoy7y`H?`{mirC`GnIiHvd}LXLglaf4j|r)uuZ*7#C}-+FPskygHqG*SqCj
zfAO6L>H8aYp3>--@FsIgo6g$W{LOwYW9(eFY-_ouqx+@Ht=f(EZ1=qNk{RCRkJ~gE
z<+~-@uTQ9d-r{!G@&lq=gBJG--tbL`_pZ>=)UYFG!+Tzgn35a$)E4#2FVSyLk2(CG
z*i%)pgZITHycgf&Zv5ma2}{cpz8YZ-O0lkVF&2+7>TfihPZ&#Y8<Srcsa3}OC&q7u
z##={>ev6GeU5o~m*1XNu9&y$+sR@VPO1OL=-pwU`<HWdGM`LeP#8kD5dE7ht@z|))
z8Ic7uBLb&{4;db|`9Gm?EkZ_L4;uSx;KKI<EcM&D|Is#}hu^d!pDv%ZDKGU({<L+&
z0*}D1ttRYs3#{ML_Q@Ob54Fgu(cG>_)2VA3ugG@(wO&Jyw;Bxl!tvi7b+?_XJ;kBc
z>fjnf233!nZ@>CCOLESu$d@nQ@qRI?$Mdo;pLW^)I4A#MS?%(g#)HqN-T(fNdlw$w
zSsi`bZQjj-)7JyMuYEGRsHpHtMW@2*2QK$(d#S^(7u?#Nk2;ipx%=6nCAs$Na+-8H
zbJ>!;DJ5&;x2IgjW%lWPazXoy+i@ojwm<IGC;h_-X}@epZJd=-;FQwt{bbL>N!L7*
z*8DfIOY=nUUu_8;Y`@+!KmWx%Fw&eFV<t8?H#wLMEN0VM=I<@d%Ws;iCz?rn%}-vK
zpT1{1df3(}FtKz`;>P%-ko=^jvyvBvro1XoN&h$X@cOi^Gt&o*JHGkT6YosQa9wcn
z(&o%=8K;6?W&I~EyTiOQC1-M;w##d=>TJ!I`I9G{Z%}e!$;3;mD=%lSDty@X>dzTP
z^Cn%3cfT<u`{wCIw;#N9cbY?q!-@N`YaaYHw7glnM=xGJuF87)@0RE1zkG4KSLIT_
zs)2TPW#{Zm_g25ZpvICxwLS=_Q~y=npp%Xte%GM$`whQp-RP&{#(jTm8sD$^^O`O}
zf4(t)P|F`G+&sT+^-mkm=D)WtjPN%1xB1lDcf~5d!t%C_y0t6#KH$aGKrgr8raePe
zO$&YR+c2+f;c2@f9JWVp|1N6QjOhM-Vye5x4laldT^;vPRD9;?_;-gUq+UvJ>tT)k
z(^{*ZF{z(1X0358(a5`IL_9YvmN)q)|IV-Q>-<-KhM(l;`Ca^8K7r5S)A&q2na}52
z_%6PY@8#P$29AW|;ixz^j*#QzXgOw%oa5&#I2X={^Wy9{N6wV<<*Ye(2*3dvFo6tw
zpad(3!3}yagd{wn3R?)n8QL(1Jp8!|TnnxU*M+OYHR4Kfy|`*zJFXztk*moy<;rq>
zxyoE?t~l47tIrx>C9ob?6|4<b2<wE^!kS^_uzpxYtR+?y>x$LI8e^rg-dJ_4Jysy=
zkk!bVWM#5GS*5I1RxIn5)yo=YC9|Gc)vRq+IP09%&YEZCv;Nry>;-lN`-0uU9$}}j
zU)VM59d;1=h~31VVrQ|x*k$ZBb{zYT-NzneC$b;emF!J+DEpM%%ARHCvVYmd>}7T|
z`<mU&9%rYs-`Vx-eJTKTfNDTZpfXS&s1no)Dh74qb@k}lib_H~p{h_@s4&zSstq-V
z%0vC33Q>!wNYo{&6E%uTMZKbGQM;&M)G?|VHI2$feWS`z>!^6tJ*poykV;5Bq$*My
zsgTr3swFj(%1QmCic(9dsMJ-eD>ar%OTDG)QhTYu)M2VIHJQpxeWprNtEt%3ZK^jl
zoJvkTr>axisqoZ!sy#KI%1`~L3(yPb2=oQI13iLHLBF7D&^zcL^bxuVJ%!Fff1%6J
zYv?%i9l8%ah)zU5qASsx=uq@2x)nW(&PD&Ci_y#IX!JF@8$IrIr!&sc_2_+cK>8rv
zke*0qq(9On>6LU$`X=3z9!e*rpVC$7t#nxWEZvr#OXsEk(uL{8bY%K6-I*Rur>0-i
zwdvh-aQZmioSsf+r@zzX>GgDc`aa#C3_ubf50DDT1|$S>0%?KFKyn~IkRr$uBnol`
z>4J<w(jae;I>;U*5ON4<giJy*A)k;^$SNcjatrB&3`3G3&yZ@!HY6N!4rzzXL-Ha2
zkb=lUBqDMV>4=O(QX(&rn#fKhC~_2OicCeaB43fR$XX;Wau?}~3`P<okCDpAW+b%N
zIgPXy7)Ej<zmej|awIx(9qEpYN75tjk^0DfBtUW?X^>1vG9(|863L1rMsg$Rkqk+a
zBu|nm$(AHcawch$%t`Vjf09DUq9jssDe07qN>U}Sl3K~GBv^7RY4&xdZ4^kpCFQ=(
zx;uTzy`*0<FiDs^Oe!WDlaR^Dq-8QQ$(j62iY7~wsL9o&Yce)To4ifxCVP{>$>F4N
zGC9edd`?OytCQHt?WA`yJV~BBPpT)|lkmy;q<u0!$)Ehs6u>ONM8I6Ybij<jq`<tu
z)WGb(1i>7^G{H>4WWjvFl)<dQ#KGLb^uY|mB*HwxRKje+gu<M{w8G57<ih;I6vHgT
zM8jOebi<6pq{F<!)Wht<1jHP~G{j8AWW;>Ll*FvW#Khdh^u!FsB*i?%RK;w?gvFf2
zw8hND<i-5O6viyZM8;gkbjFOvq{h6))W+<_1jii5G{;QGWXF8Rl*g>c#K+wCdiv}6
zl1Y$xQ1=IQe^B=ab$?Lz2X%i?_Xl-<Q1=IQe^B=ab$?Lz2X%i?_Xl-<Q1=IQe^B=a
zb$?Lz2X%i?_Xl-<Q1=IQe^B=ab$?LL59;|rJwK@D2lf1*o*&fngL-~Y&kySPK|MdH
z=Lhxtpq?Mp^MiVRP|pwQ`9VEDsOJau{Ggs6)boRSeo)U3>iI!EKd9#i^?p&kUsUfG
z)%!*Deo?(&RPPtn`$hGBQN3SO?-$kkMfHABy<b%C7uEYk^?p&kUsUfG)%!*Deo?(&
zRPPtn`$hGBQN3SO?-$kkMdcrqe-Imv{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}R
zgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(<p1ix}A|^5dpazgwbR3Zp0Nk6Ab^
zHasM*XF=S-S@E5m6E3e$7~yO^G|QTC*}5Re$eV2J+G$vFjnq=3qS9FZ%3!{3nr~F@
zF+P}Xbcr-luUqddw5GXQKi`tT-`d@=EPiZB+|>4Q_ZG!IPKr5P9zCi_G;aa2Mn#^C
zjBxZ1k8d2draYv3YVh8bL0Cn`R1fI6&%d;H+tv4czn<wM|DgPX@(;>CDF2}RgYpl`
zKPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}RgYpl`KPdm8{Dblj%0DRop!|dK
z56V9%|DgPX@(;>CDF2}RgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}R
zgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}RgYpl`KPdm8{Dblj%0DRo
zp!|dK56V9%|DgPX@(;>CDF2}RgB_z2<R6rOQ2s&r2jw4>e^CBG`3L16lz&kELHP&e
zAC!Mk{z3T%<sX!PQ2s&r2jw4>e^CBG`3L16lz*^c%)QxP-kD|I`mFBF?Sro`-CsP>
zv1tGJE8ET$Y<c^##k|<SxZs(3Zc*R-)g^h0R^<L2oiqAgcJYqWqbFurJDl3=p2?rF
zI6gnI=*jV!Rq1Yx(|-(43ml&MY(t7)VKRSnyZ_3hA1V^_CMCK*v#ncVd$+A^aK6c#
zplk!pupl$U*|gc4H!SAndS;ooiCHFeuIZFy4sy2Hjki5GXOn+W{z3T%<sX!PQ2s&r
z2jw4>e^CBG`3L16lz&kELHP&eAC!Mk{z3T%<sX!PQ2s&r2jw4>e^CBG`3L16lz&kE
zLHP&eAC!Mk{z3T%<sX!PQ2s&r2jw4>e^CBG`3L16lz&kELHP&eAC!Mk{z3T%<sX!P
zQ2s&r2jw4>e^CBG`3L16lz&kELHP&eAC!Mk{z3T%<sX!PQ2s&r2jw4>e^CBG`3L16
zlz&kELHP&eAC!Mk{z3T%<sX!PQ2xQKJ(A@glz&kELHP&eAC!Mk{z3T%<sX!PQ2s&r
z2jw4>e^CBG`3L16lz&kELHP&eAC!Mk{z3T%<sX!PaE9HTbI0z^om0}hQ)zsIvZWWx
zx9)u8^VO3*J)Y%zRajrVn0=-)YG;*Yp4}G%tHlO86g$*(yilv(t~x`$s&}|c{f3R5
z_;WV*O^uojZSqaCX7w|h_nztU#K-lglP#Zq>dxQZ@3z)+ldD&!P2OeAe0nbSZB*`8
z+RLA}fu%nVh=>XLep2wEpF`eF4Q+8HtoVL7cA>?0BA=g)T6r+qWm(L)_hRKAlz&kE
zLHP&eAC!Mk{z3T%<sX!PQ2s&r2jw4>e^CBG`3L16lz&kELHP&eAC!Mk{z3T%<sX!P
zQ2s&r2jw4>e^CBG`3L16lz&kELHP&eAC!Mk{z3T%<sX!PQ2s&r2jw4>e^CBG`3L16
zlz&kELHP&eAC!Mk{z3T%<sX!PQ2s&r2jw4>e^CBG`3L16lz&kELHP&eAC!Mk{z3T%
z<sX!PQ2s&r2jw4>e^CBG`3L16lz&kELHP&eAC!Nv#-Ud756V9%|DgPX@(;>CDF2}R
zgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}RgYpl`Ke*+i$c~>xtQr!2
ztaDf`-_YHa!FSVxsxJwQZy#{}zQ5<%w!;JcicFt}UE36$^s?{VddndXH}6)-i``0Z
zx;{1Dh+NZxKWj~I(=>N<<E7i3-zj#Q>e66rcgK~p>$cxhn<qF59BQlzsy=Lh{q8xI
zqV2DSWxrflQNbJGCiHyzbmrqjyB{{cQZ}v0gXr$}4=uTeVQP-k?RkT4nmewqdsRGS
zV3GWT@(;>CDF2}RgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}RgYpl`
zKPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}RgYpl`KPdm8{Dblj%0DRop!|dK
z56V9%|DgPX@(;>CDF2}RgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}R
hgYpl`KPdm8{Dblj%0DRop!|dK56VCI|EYiQ{{ZfUN{0Xd

literal 0
HcmV?d00001