From 3e4bc05a2ed0c4f0e6da4c4739e50d97f9d70c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Felipe=20Santos?= <5733+jfsantos@users.noreply.github.com> Date: Fri, 13 Feb 2026 16:54:22 -0800 Subject: [PATCH 1/4] Add profiling instrumentation for NAM building blocks (#219) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add profiling instrumentation for NAM building blocks Adds a profiling framework (NAM/profiling.h, NAM/profiling.cpp) with NAM_PROFILE_START()/NAM_PROFILE_ADD() macros and 14 timing categories. Supports both desktop (std::chrono) and ARM Cortex-M7 (DWT cycle counter) backends. Profiling is compile-time gated via -DNAM_PROFILING. Instruments wavenet _Layer::Process() and _LayerArray::ProcessInner() with per-category timing, and adds profiling reset/print calls to the benchmodel tool. Co-Authored-By: Claude Opus 4.6 * Fixed build flags for benchmodel * Added a command line tool to output memory usage for a given .nam file * Bugfix - checking that condition_dsp is not null in the JSON (#220) * [BUGFIX, BREAKING] Make activation base class abstract, fix PReLU implementation (#223) * Make activation apply method pure virtual instead of no-op default * Fix bugs * Refactor to throw std::invalid_argument in debug mode, add tests * Add TONE3000 support note in README.md (#224) * Replace hardcoded profiling struct with dynamic registry The Timings struct hardcoded 14 named fields, requiring manual updates to reset(), total(), print_results(), and every call site whenever a category was added or removed. Replace with a flat-array registry where types are registered at file scope via register_type(), returning an integer index for O(1) accumulation in the hot path. Also adds NAM_PROFILE_RESTART() macro to replace a raw #ifdef block in wavenet.cpp. --------- Co-authored-by: João Felipe Santos Co-authored-by: Claude Opus 4.6 Co-authored-by: Steven Atkinson --- NAM/conv1d.cpp | 4 + NAM/dsp.cpp | 4 + NAM/film.h | 4 + NAM/profiling.cpp | 88 ++++++ NAM/profiling.h | 85 ++++++ NAM/wavenet.cpp | 31 +++ tools/CMakeLists.txt | 6 +- tools/benchmodel.cpp | 7 + tools/memory_usage.cpp | 611 +++++++++++++++++++++++++++++++++++++++++ 9 files changed, 839 insertions(+), 1 deletion(-) create mode 100644 NAM/profiling.cpp create mode 100644 NAM/profiling.h create mode 100644 tools/memory_usage.cpp diff --git a/NAM/conv1d.cpp b/NAM/conv1d.cpp index 9bbbc020..d440f0c1 100644 --- a/NAM/conv1d.cpp +++ b/NAM/conv1d.cpp @@ -1,4 +1,5 @@ #include "conv1d.h" +#include "profiling.h" #include namespace nam @@ -143,6 +144,9 @@ void Conv1D::SetMaxBufferSize(const int maxBufferSize) void Conv1D::Process(const Eigen::MatrixXf& input, const int num_frames) { + // Note: Profiling is done at the caller level (e.g., _Layer::Process in wavenet.cpp) + // to avoid double-counting when Conv1D is called from within profiled blocks. + // Write input to ring buffer _input_buffer.Write(input, num_frames); diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp index 05dab09d..b644af31 100644 --- a/NAM/dsp.cpp +++ b/NAM/dsp.cpp @@ -8,6 +8,7 @@ #include #include "dsp.h" +#include "profiling.h" #include "registry.h" #define tanh_impl_ std::tanh @@ -443,6 +444,9 @@ Eigen::MatrixXf nam::Conv1x1::process(const Eigen::MatrixXf& input, const int nu void nam::Conv1x1::process_(const Eigen::Ref& input, const int num_frames) { + // Note: Profiling is done at the caller level (e.g., _Layer::Process in wavenet.cpp) + // to provide meaningful categories (input_mixin, layer1x1, head1x1, rechannel) + // rather than generic conv1x1. assert(num_frames <= _output.cols()); if (this->_is_depthwise) diff --git a/NAM/film.h b/NAM/film.h index f0f86fb4..eeb750a4 100644 --- a/NAM/film.h +++ b/NAM/film.h @@ -81,9 +81,13 @@ class FiLM assert(num_frames <= condition.cols()); assert(num_frames <= _output.cols()); + // Conv1x1 to compute scale/shift from condition _cond_to_scale_shift.process_(condition, num_frames); const auto& scale_shift = _cond_to_scale_shift.GetOutput(); + // Note: FiLM time is included in the caller's profiling category (e.g., conv1d, input_mixin) + // rather than tracked separately, to avoid double-counting. + const auto scale = scale_shift.topRows(get_input_dim()).leftCols(num_frames); if (_do_shift) { diff --git a/NAM/profiling.cpp b/NAM/profiling.cpp new file mode 100644 index 00000000..885872ee --- /dev/null +++ b/NAM/profiling.cpp @@ -0,0 +1,88 @@ +#include "profiling.h" + +#ifdef NAM_PROFILING + +#if defined(__ARM_ARCH_7EM__) || defined(ARM_MATH_CM7) +// ARM Cortex-M7: Use DWT cycle counter for precise timing +#include "stm32h7xx.h" + +namespace nam { +namespace profiling { + +ProfilingEntry g_entries[MAX_PROFILING_TYPES] = {}; +int g_num_entries = 0; + +// CPU frequency in MHz (Daisy runs at 480 MHz) +static constexpr uint32_t CPU_FREQ_MHZ = 480; + +uint32_t get_time_us() { + // DWT->CYCCNT gives cycle count + // Divide by CPU_FREQ_MHZ to get microseconds + return DWT->CYCCNT / CPU_FREQ_MHZ; +} + +} // namespace profiling +} // namespace nam + +#else +// Non-ARM: Use std::chrono for timing (for testing on desktop) +#include + +namespace nam { +namespace profiling { + +ProfilingEntry g_entries[MAX_PROFILING_TYPES] = {}; +int g_num_entries = 0; + +uint32_t get_time_us() { + using namespace std::chrono; + static auto start = high_resolution_clock::now(); + auto now = high_resolution_clock::now(); + return (uint32_t)duration_cast(now - start).count(); +} + +} // namespace profiling +} // namespace nam + +#endif // ARM check + +namespace nam { +namespace profiling { + +int register_type(const char* name) { + int idx = g_num_entries++; + g_entries[idx].name = name; + g_entries[idx].accumulated_us = 0; + return idx; +} + +void reset() { + for (int i = 0; i < g_num_entries; i++) + g_entries[i].accumulated_us = 0; +} + +void print_results() { + uint32_t total = 0; + for (int i = 0; i < g_num_entries; i++) + total += g_entries[i].accumulated_us; + + printf("\nProfiling breakdown:\n"); + printf("%-12s %8s %6s\n", "Category", "Time(ms)", "%"); + printf("%-12s %8s %6s\n", "--------", "--------", "----"); + + for (int i = 0; i < g_num_entries; i++) { + uint32_t us = g_entries[i].accumulated_us; + if (us > 0) { + uint32_t pct = total > 0 ? (us * 100 / total) : 0; + printf("%-12s %8.1f %5lu%%\n", g_entries[i].name, us / 1000.0f, (unsigned long)pct); + } + } + + printf("%-12s %8s %6s\n", "--------", "--------", "----"); + printf("%-12s %8.1f %5s\n", "Total", total / 1000.0f, "100%"); +} + +} // namespace profiling +} // namespace nam + +#endif // NAM_PROFILING diff --git a/NAM/profiling.h b/NAM/profiling.h new file mode 100644 index 00000000..4db570b9 --- /dev/null +++ b/NAM/profiling.h @@ -0,0 +1,85 @@ +#pragma once + +// Dynamic profiling registry for NAM building blocks +// Enable with -DNAM_PROFILING +// +// Usage: +// 1. Register profiling types at file scope (static init): +// static int PROF_FOO = nam::profiling::register_type("Foo"); +// 2. Call nam::profiling::reset() before benchmark +// 3. In hot path: +// NAM_PROFILE_START(); +// // ... code ... +// NAM_PROFILE_ADD(PROF_FOO); +// 4. Call nam::profiling::print_results() to display breakdown + +#ifdef NAM_PROFILING + +#include +#include + +namespace nam { +namespace profiling { + +constexpr int MAX_PROFILING_TYPES = 32; + +struct ProfilingEntry { + const char* name; + uint32_t accumulated_us; +}; + +extern ProfilingEntry g_entries[MAX_PROFILING_TYPES]; +extern int g_num_entries; + +// Register a named profiling type. Returns index for fast accumulation. +// Called at static-init time or during setup, NOT in the hot path. +int register_type(const char* name); + +// Get current time in microseconds (platform-specific) +uint32_t get_time_us(); + +// Reset all profiling counters +void reset(); + +// Print profiling results to stdout +void print_results(); + +// Helper macros for timing sections +// Usage: +// NAM_PROFILE_START(); +// // ... code to profile ... +// NAM_PROFILE_ADD(PROF_FOO); // Adds elapsed time to entry, resets timer + +#define NAM_PROFILE_START() uint32_t _prof_start = nam::profiling::get_time_us() +#define NAM_PROFILE_ADD(idx) do { \ + uint32_t _prof_now = nam::profiling::get_time_us(); \ + nam::profiling::g_entries[idx].accumulated_us += (_prof_now - _prof_start); \ + _prof_start = _prof_now; \ +} while(0) + +// Variant that doesn't reset the timer (for one-shot measurements) +#define NAM_PROFILE_ADD_NORESTART(idx) \ + nam::profiling::g_entries[idx].accumulated_us += (nam::profiling::get_time_us() - _prof_start) + +// Reset the timer without recording (for re-syncing mid-function) +#define NAM_PROFILE_RESTART() _prof_start = nam::profiling::get_time_us() + +} // namespace profiling +} // namespace nam + +#else // NAM_PROFILING not defined + +// No-op macros when profiling is disabled +#define NAM_PROFILE_START() ((void)0) +#define NAM_PROFILE_ADD(idx) ((void)0) +#define NAM_PROFILE_ADD_NORESTART(idx) ((void)0) +#define NAM_PROFILE_RESTART() ((void)0) + +namespace nam { +namespace profiling { + inline void reset() {} + inline void print_results() {} +} // namespace profiling +} // namespace nam + +#endif // NAM_PROFILING diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 6eb74a3b..4a1b5217 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -6,9 +6,20 @@ #include #include "get_dsp.h" +#include "profiling.h" #include "registry.h" #include "wavenet.h" +#ifdef NAM_PROFILING +static int PROF_CONV1D = nam::profiling::register_type("Conv1D"); +static int PROF_INPUT_MIXIN = nam::profiling::register_type("InputMixin"); +static int PROF_LAYER1X1 = nam::profiling::register_type("Layer1x1"); +static int PROF_HEAD1X1 = nam::profiling::register_type("Head1x1"); +static int PROF_RECHANNEL = nam::profiling::register_type("Rechannel"); +static int PROF_ACTIVATION = nam::profiling::register_type("Activation"); +static int PROF_COPIES = nam::profiling::register_type("Copies"); +#endif + // Layer ====================================================================== void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize) @@ -89,6 +100,8 @@ void nam::wavenet::_Layer::set_weights_(std::vector::iterator& weights) void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames) { + NAM_PROFILE_START(); + const long bottleneck = this->_bottleneck; // Use the actual bottleneck value, not the doubled output channels // Step 1: input convolutions @@ -107,6 +120,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma Eigen::MatrixXf& conv_output = this->_conv.GetOutput(); this->_conv_post_film->Process_(conv_output, condition, num_frames); } + NAM_PROFILE_ADD(PROF_CONV1D); if (this->_input_mixin_pre_film) { @@ -123,8 +137,12 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma Eigen::MatrixXf& input_mixin_output = this->_input_mixin.GetOutput(); this->_input_mixin_post_film->Process_(input_mixin_output, condition, num_frames); } + NAM_PROFILE_ADD(PROF_INPUT_MIXIN); + this->_z.leftCols(num_frames).noalias() = _conv.GetOutput().leftCols(num_frames) + _input_mixin.GetOutput().leftCols(num_frames); + NAM_PROFILE_ADD(PROF_COPIES); + if (this->_activation_pre_film) { this->_activation_pre_film->Process_(this->_z, condition, num_frames); @@ -139,6 +157,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma if (this->_gating_mode == GatingMode::NONE) { this->_activation->apply(this->_z.leftCols(num_frames)); + NAM_PROFILE_ADD(PROF_ACTIVATION); if (this->_activation_post_film) { this->_activation_post_film->Process_(this->_z, condition, num_frames); @@ -146,6 +165,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma if (this->_layer1x1) { this->_layer1x1->process_(this->_z, num_frames); + NAM_PROFILE_ADD(PROF_LAYER1X1); } } else if (this->_gating_mode == GatingMode::GATED) @@ -155,6 +175,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma auto input_block = this->_z.leftCols(num_frames); auto output_block = this->_z.topRows(bottleneck).leftCols(num_frames); this->_gating_activation->apply(input_block, output_block); + NAM_PROFILE_ADD(PROF_ACTIVATION); if (this->_activation_post_film) { // Use Process() for blocks and copy result back @@ -165,6 +186,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma if (this->_layer1x1) { this->_layer1x1->process_(this->_z.topRows(bottleneck), num_frames); + NAM_PROFILE_ADD(PROF_LAYER1X1); } } else if (this->_gating_mode == GatingMode::BLENDED) @@ -174,6 +196,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma auto input_block = this->_z.leftCols(num_frames); auto output_block = this->_z.topRows(bottleneck).leftCols(num_frames); this->_blending_activation->apply(input_block, output_block); + NAM_PROFILE_ADD(PROF_ACTIVATION); if (this->_activation_post_film) { // Use Process() for blocks and copy result back @@ -184,6 +207,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma if (this->_layer1x1) { this->_layer1x1->process_(this->_z.topRows(bottleneck), num_frames); + NAM_PROFILE_ADD(PROF_LAYER1X1); if (this->_layer1x1_post_film) { Eigen::MatrixXf& layer1x1_output = this->_layer1x1->GetOutput(); @@ -207,6 +231,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma Eigen::MatrixXf& head1x1_output = this->_head1x1->GetOutput(); this->_head1x1_post_film->Process_(head1x1_output, condition, num_frames); } + NAM_PROFILE_ADD(PROF_HEAD1X1); this->_output_head.leftCols(num_frames).noalias() = this->_head1x1->GetOutput().leftCols(num_frames); } else // No head 1x1 @@ -230,6 +255,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma // If layer1x1 is inactive, residual connection is just the input (identity) this->_output_next_layer.leftCols(num_frames).noalias() = input.leftCols(num_frames); } + NAM_PROFILE_ADD(PROF_COPIES); } // LayerArray ================================================================= @@ -298,9 +324,12 @@ void nam::wavenet::_LayerArray::Process(const Eigen::MatrixXf& layer_inputs, con void nam::wavenet::_LayerArray::ProcessInner(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition, const int num_frames) { + NAM_PROFILE_START(); + // Process rechannel and get output this->_rechannel.process_(layer_inputs, num_frames); Eigen::MatrixXf& rechannel_output = _rechannel.GetOutput(); + NAM_PROFILE_ADD(PROF_RECHANNEL); // Process layers for (size_t i = 0; i < this->_layers.size(); i++) @@ -329,7 +358,9 @@ void nam::wavenet::_LayerArray::ProcessInner(const Eigen::MatrixXf& layer_inputs this->_layers[last_layer].GetOutputNextLayer().leftCols(num_frames); // Process head rechannel + NAM_PROFILE_RESTART(); _head_rechannel.process_(this->_head_inputs, num_frames); + NAM_PROFILE_ADD(PROF_RECHANNEL); } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 8118e085..8f02f20e 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -12,10 +12,13 @@ include_directories(tools ${NAM_DEPS_PATH}/nlohmann) add_executable(loadmodel loadmodel.cpp ${NAM_SOURCES}) add_executable(benchmodel benchmodel.cpp ${NAM_SOURCES}) +add_executable(memory_usage memory_usage.cpp) add_executable(run_tests run_tests.cpp test/allocation_tracking.cpp ${NAM_SOURCES}) # Compile run_tests without optimizations to ensure allocation tracking works correctly # Also ensure assertions are enabled (NDEBUG is not defined) so tests actually run set_target_properties(run_tests PROPERTIES COMPILE_OPTIONS "-O0") +# Benchmodel should be built with NAM_PROFILING set +target_compile_definitions(benchmodel PRIVATE NAM_PROFILING) # Ensure assertions are enabled for run_tests by removing NDEBUG if it was set # Release/RelWithDebInfo/MinSizeRel build types automatically define NDEBUG # We use a compile option to undefine it, which works on GCC, Clang, and MSVC @@ -32,6 +35,7 @@ endif() source_group(NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${NAM_SOURCES}) target_compile_features(${TOOLS} PUBLIC cxx_std_20) +target_compile_features(memory_usage PUBLIC cxx_std_20) set_target_properties(${TOOLS} PROPERTIES @@ -61,4 +65,4 @@ endif() # /Users/steve/src/NeuralAmpModelerCore/Dependencies/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h # Don't let this break my build on debug: set_source_files_properties(../NAM/dsp.cpp PROPERTIES COMPILE_FLAGS "-Wno-error") -set_source_files_properties(../NAM/conv1d.cpp PROPERTIES COMPILE_FLAGS "-Wno-error") \ No newline at end of file +set_source_files_properties(../NAM/conv1d.cpp PROPERTIES COMPILE_FLAGS "-Wno-error") diff --git a/tools/benchmodel.cpp b/tools/benchmodel.cpp index 39c14b0e..42556f59 100644 --- a/tools/benchmodel.cpp +++ b/tools/benchmodel.cpp @@ -4,6 +4,7 @@ #include "NAM/dsp.h" #include "NAM/get_dsp.h" +#include "NAM/profiling.h" using std::chrono::duration; using std::chrono::duration_cast; @@ -62,6 +63,9 @@ int main(int argc, char* argv[]) outputPtrs[ch] = outputBuffers[ch].data(); } + // Reset profiling counters before benchmark + nam::profiling::reset(); + std::cout << "Running benchmark\n"; auto t1 = high_resolution_clock::now(); for (size_t i = 0; i < numBuffers; i++) @@ -80,6 +84,9 @@ int main(int argc, char* argv[]) std::cout << ms_int.count() << "ms\n"; std::cout << ms_double.count() << "ms\n"; + + // Print profiling breakdown if enabled + nam::profiling::print_results(); } else { diff --git a/tools/memory_usage.cpp b/tools/memory_usage.cpp new file mode 100644 index 00000000..853ca8fe --- /dev/null +++ b/tools/memory_usage.cpp @@ -0,0 +1,611 @@ +// memory_usage.cpp — Report total memory required to host a NAM model at runtime. +// +// Usage: memory_usage [--buffer-size N] +// +// Parses the .nam JSON config and computes weight memory (learned parameters stored +// in Eigen matrices/vectors) and buffer memory (intermediate computation/state that +// depends on maxBufferSize) without instantiating the model. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "json.hpp" + +using json = nlohmann::json; + +static constexpr int DEFAULT_BUFFER_SIZE = 2048; +static constexpr long INPUT_BUFFER_SAFETY_FACTOR = 32; + +// ─── Result accumulator ───────────────────────────────────────────────────── + +struct MemoryResult +{ + size_t weight_bytes = 0; + size_t buffer_bytes = 0; + + void add_weights(size_t floats) { weight_bytes += floats * sizeof(float); } + void add_buffers(size_t floats) { buffer_bytes += floats * sizeof(float); } + + MemoryResult& operator+=(const MemoryResult& o) + { + weight_bytes += o.weight_bytes; + buffer_bytes += o.buffer_bytes; + return *this; + } +}; + +// ─── Conv1x1 ──────────────────────────────────────────────────────────────── + +// Conv1x1 stores either a full (out_channels x in_channels) matrix (possibly +// block-diagonal when grouped), or a depthwise weight vector when groups == +// in_channels == out_channels. +static MemoryResult conv1x1_memory(int in_ch, int out_ch, bool bias, int groups, int M) +{ + MemoryResult r; + bool depthwise = (groups == in_ch && in_ch == out_ch); + if (depthwise) + r.add_weights(in_ch); // _depthwise_weight(in_ch) + else + r.add_weights((size_t)out_ch * in_ch); // _weight(out_ch, in_ch) + if (bias) + r.add_weights(out_ch); // _bias(out_ch) + r.add_buffers((size_t)out_ch * M); // _output(out_ch, M) + return r; +} + +// ─── Conv1D ───────────────────────────────────────────────────────────────── + +// Conv1D stores kernel_size weight matrices (each out_ch x in_ch) or depthwise +// vectors, plus a bias vector, a ring buffer, and an output buffer. +static MemoryResult conv1d_memory(int in_ch, int out_ch, int kernel_size, bool bias, int dilation, int groups, int M) +{ + MemoryResult r; + bool depthwise = (groups == in_ch && in_ch == out_ch); + if (depthwise) + r.add_weights((size_t)kernel_size * in_ch); // _depthwise_weight[k](in_ch) + else + r.add_weights((size_t)kernel_size * out_ch * in_ch); // _weight[k](out_ch, in_ch) + if (bias) + r.add_weights(out_ch); // _bias(out_ch) + + // Ring buffer: storage = (in_ch, 2 * max_lookback + M) + // max_lookback = (kernel_size - 1) * dilation + long max_lookback = (kernel_size > 0) ? (long)(kernel_size - 1) * dilation : 0; + long ring_storage = 2 * max_lookback + M; + r.add_buffers((size_t)in_ch * ring_storage); // _input_buffer._storage + + // Output buffer: (out_ch, M) + r.add_buffers((size_t)out_ch * M); // _output + + return r; +} + +// ─── FiLM ─────────────────────────────────────────────────────────────────── + +struct FiLMParams +{ + bool active = false; + bool shift = true; + int groups = 1; +}; + +static MemoryResult film_memory(int condition_dim, int input_dim, const FiLMParams& fp, int M) +{ + if (!fp.active) + return {}; + MemoryResult r; + int scale_shift_dim = fp.shift ? 2 * input_dim : input_dim; + // _cond_to_scale_shift is a Conv1x1(condition_dim -> scale_shift_dim, bias=true, groups) + r += conv1x1_memory(condition_dim, scale_shift_dim, true, fp.groups, M); + // _output(input_dim, M) + r.add_buffers((size_t)input_dim * M); + return r; +} + +// ─── BatchNorm ────────────────────────────────────────────────────────────── + +static MemoryResult batchnorm_memory(int dim) +{ + MemoryResult r; + // Stores scale(dim) + loc(dim) derived from running_mean, running_var, weight, bias, eps + // The source values are consumed from weights array; only scale + loc are stored at runtime. + r.add_weights(2 * (size_t)dim); + return r; +} + +// ─── LSTM ─────────────────────────────────────────────────────────────────── + +static MemoryResult lstm_memory(const json& config) +{ + MemoryResult r; + int num_layers = config["num_layers"]; + int input_size = config["input_size"]; + int hidden_size = config["hidden_size"]; + int in_channels = config.value("in_channels", 1); + int out_channels = config.value("out_channels", 1); + + for (int i = 0; i < num_layers; i++) + { + int cell_input = (i == 0) ? input_size : hidden_size; + // _w(4*H, I+H) + r.add_weights((size_t)4 * hidden_size * (cell_input + hidden_size)); + // _b(4*H) + r.add_weights(4 * (size_t)hidden_size); + // _xh(I+H) — stores initial hidden state in the hidden portion + r.add_weights((size_t)(cell_input + hidden_size)); + // _c(H) — initial cell state + r.add_weights((size_t)hidden_size); + + // Buffers: _ifgo(4*H) + r.add_buffers(4 * (size_t)hidden_size); + // Note: _xh and _c are also modified during inference but they are + // loaded from weights (initial state), so counted as weights above. + } + + // _head_weight(out_channels, hidden_size) + r.add_weights((size_t)out_channels * hidden_size); + // _head_bias(out_channels) + r.add_weights(out_channels); + + // Top-level buffers: _input(input_size), _output(out_channels) + r.add_buffers(input_size); + r.add_buffers(out_channels); + + return r; +} + +// ─── Linear ───────────────────────────────────────────────────────────────── + +static MemoryResult linear_memory(const json& config) +{ + MemoryResult r; + int receptive_field = config["receptive_field"]; + bool bias = config["bias"]; + int in_channels = config.value("in_channels", 1); + int out_channels = config.value("out_channels", 1); + + // _weight(receptive_field) + r.add_weights(receptive_field); + // _bias (scalar float) + if (bias) + r.add_weights(1); + + // Buffer base: _input_buffers = in_channels vectors of (32 * receptive_field) + r.add_buffers((size_t)in_channels * INPUT_BUFFER_SAFETY_FACTOR * receptive_field); + // _output_buffers: resized per-call, not pre-allocated to a fixed size + // (depends on num_frames, not maxBufferSize) + + return r; +} + +// ─── ConvNet ──────────────────────────────────────────────────────────────── + +static MemoryResult convnet_memory(const json& config, int M) +{ + MemoryResult r; + int channels = config["channels"]; + std::vector dilations = config["dilations"]; + bool batchnorm = config["batchnorm"]; + int groups = config.value("groups", 1); + int in_channels = config.value("in_channels", 1); + int out_channels = config.value("out_channels", 1); + + int max_dilation = *std::max_element(dilations.begin(), dilations.end()); + + // Buffer base class: _input_buffers = in_channels * (32 * max_dilation) + int receptive_field = max_dilation; // passed to Buffer as receptive_field + r.add_buffers((size_t)in_channels * INPUT_BUFFER_SAFETY_FACTOR * receptive_field); + + // ConvNet blocks + for (size_t i = 0; i < dilations.size(); i++) + { + int block_in = (i == 0) ? in_channels : channels; + int block_out = channels; + // Conv1D with kernel_size=2, bias=!batchnorm + r += conv1d_memory(block_in, block_out, 2, !batchnorm, dilations[i], groups, M); + // Optional batchnorm + if (batchnorm) + r += batchnorm_memory(block_out); + // _output(out_channels, M) per block + r.add_buffers((size_t)block_out * M); + } + + // _block_vals: 1 entry of (channels, buffer_size) + // buffer_size = input_buffers[0].size() = 32 * receptive_field + long buffer_size = INPUT_BUFFER_SAFETY_FACTOR * receptive_field; + r.add_buffers((size_t)channels * buffer_size); + + // _head: weight(out_channels, channels) + bias(out_channels) + r.add_weights((size_t)out_channels * channels); + r.add_weights(out_channels); + + // _head_output is resized per-call, not a fixed pre-allocation + + return r; +} + +// ─── WaveNet helpers ──────────────────────────────────────────────────────── + +static FiLMParams parse_film_params(const json& layer_config, const std::string& key) +{ + FiLMParams fp; + if (layer_config.find(key) == layer_config.end() || layer_config[key] == false) + return fp; // inactive + const json& fc = layer_config[key]; + fp.active = fc.value("active", true); + fp.shift = fc.value("shift", true); + fp.groups = fc.value("groups", 1); + return fp; +} + +enum class GatingMode +{ + NONE, + GATED, + BLENDED +}; + +static std::vector parse_gating_modes(const json& layer_config, size_t num_layers) +{ + std::vector modes; + + auto parse_str = [](const std::string& s) -> GatingMode { + if (s == "gated") + return GatingMode::GATED; + if (s == "blended") + return GatingMode::BLENDED; + return GatingMode::NONE; + }; + + if (layer_config.find("gating_mode") != layer_config.end()) + { + if (layer_config["gating_mode"].is_array()) + { + for (const auto& gm : layer_config["gating_mode"]) + modes.push_back(parse_str(gm.get())); + } + else + { + GatingMode mode = parse_str(layer_config["gating_mode"].get()); + modes.resize(num_layers, mode); + } + } + else if (layer_config.find("gated") != layer_config.end()) + { + bool gated = layer_config["gated"]; + modes.resize(num_layers, gated ? GatingMode::GATED : GatingMode::NONE); + } + else + { + modes.resize(num_layers, GatingMode::NONE); + } + return modes; +} + +// WaveNet _Layer memory +static MemoryResult wavenet_layer_memory(int condition_size, int channels, int bottleneck, int kernel_size, int dilation, + GatingMode gating_mode, int groups_input, int groups_input_mixin, + bool layer1x1_active, int layer1x1_groups, bool head1x1_active, + int head1x1_out_channels, int head1x1_groups, const FiLMParams& conv_pre_film, + const FiLMParams& conv_post_film, const FiLMParams& input_mixin_pre_film, + const FiLMParams& input_mixin_post_film, + const FiLMParams& activation_pre_film, + const FiLMParams& activation_post_film, + const FiLMParams& layer1x1_post_film, const FiLMParams& head1x1_post_film, + int M) +{ + MemoryResult r; + bool gated = (gating_mode != GatingMode::NONE); + int conv_out = gated ? 2 * bottleneck : bottleneck; + + // _conv: Conv1D(channels -> conv_out, kernel_size, bias=true, dilation, groups_input) + r += conv1d_memory(channels, conv_out, kernel_size, true, dilation, groups_input, M); + + // _input_mixin: Conv1x1(condition_size -> conv_out, bias=false, groups_input_mixin) + r += conv1x1_memory(condition_size, conv_out, false, groups_input_mixin, M); + + // _layer1x1 (optional): Conv1x1(bottleneck -> channels, bias=true, layer1x1_groups) + if (layer1x1_active) + r += conv1x1_memory(bottleneck, channels, true, layer1x1_groups, M); + + // _head1x1 (optional): Conv1x1(bottleneck -> head1x1_out_channels, bias=true, head1x1_groups) + if (head1x1_active) + r += conv1x1_memory(bottleneck, head1x1_out_channels, true, head1x1_groups, M); + + // Buffers: _z(conv_out, M) + r.add_buffers((size_t)conv_out * M); + // _output_next_layer(channels, M) + r.add_buffers((size_t)channels * M); + // _output_head: if head1x1 active -> (head1x1_out_channels, M), else (bottleneck, M) + int head_out = head1x1_active ? head1x1_out_channels : bottleneck; + r.add_buffers((size_t)head_out * M); + + // FiLM modules (up to 8) + r += film_memory(condition_size, channels, conv_pre_film, M); + r += film_memory(condition_size, conv_out, conv_post_film, M); + r += film_memory(condition_size, condition_size, input_mixin_pre_film, M); + r += film_memory(condition_size, conv_out, input_mixin_post_film, M); + r += film_memory(condition_size, conv_out, activation_pre_film, M); + r += film_memory(condition_size, bottleneck, activation_post_film, M); + if (layer1x1_active) + r += film_memory(condition_size, channels, layer1x1_post_film, M); + if (head1x1_active) + r += film_memory(condition_size, head1x1_out_channels, head1x1_post_film, M); + + return r; +} + +// WaveNet _LayerArray memory +static MemoryResult wavenet_layer_array_memory(const json& layer_config, int M) +{ + MemoryResult r; + int input_size = layer_config["input_size"]; + int condition_size = layer_config["condition_size"]; + int head_size = layer_config["head_size"]; + int channels = layer_config["channels"]; + int bottleneck = layer_config.value("bottleneck", channels); + int kernel_size = layer_config["kernel_size"]; + std::vector dilations = layer_config["dilations"]; + size_t num_layers = dilations.size(); + bool head_bias = layer_config["head_bias"]; + + int groups_input = layer_config.value("groups_input", 1); + int groups_input_mixin = layer_config.value("groups_input_mixin", 1); + + // layer1x1 params + bool layer1x1_active = true; + int layer1x1_groups = 1; + if (layer_config.find("layer1x1") != layer_config.end()) + { + layer1x1_active = layer_config["layer1x1"]["active"]; + layer1x1_groups = layer_config["layer1x1"]["groups"]; + } + + // head1x1 params + bool head1x1_active = false; + int head1x1_out_channels = channels; + int head1x1_groups = 1; + if (layer_config.find("head1x1") != layer_config.end()) + { + head1x1_active = layer_config["head1x1"]["active"]; + head1x1_out_channels = layer_config["head1x1"]["out_channels"]; + head1x1_groups = layer_config["head1x1"]["groups"]; + } + + // Gating modes + std::vector gating_modes = parse_gating_modes(layer_config, num_layers); + + // FiLM params + FiLMParams conv_pre = parse_film_params(layer_config, "conv_pre_film"); + FiLMParams conv_post = parse_film_params(layer_config, "conv_post_film"); + FiLMParams input_mixin_pre = parse_film_params(layer_config, "input_mixin_pre_film"); + FiLMParams input_mixin_post = parse_film_params(layer_config, "input_mixin_post_film"); + FiLMParams activation_pre = parse_film_params(layer_config, "activation_pre_film"); + FiLMParams activation_post = parse_film_params(layer_config, "activation_post_film"); + FiLMParams layer1x1_post = parse_film_params(layer_config, "layer1x1_post_film"); + FiLMParams head1x1_post = parse_film_params(layer_config, "head1x1_post_film"); + + // _rechannel: Conv1x1(input_size -> channels, bias=false) + r += conv1x1_memory(input_size, channels, false, 1, M); + + // Per-layer + for (size_t i = 0; i < num_layers; i++) + { + r += wavenet_layer_memory(condition_size, channels, bottleneck, kernel_size, dilations[i], gating_modes[i], + groups_input, groups_input_mixin, layer1x1_active, layer1x1_groups, head1x1_active, + head1x1_out_channels, head1x1_groups, conv_pre, conv_post, input_mixin_pre, + input_mixin_post, activation_pre, activation_post, layer1x1_post, head1x1_post, M); + } + + // _head_rechannel: Conv1x1(head_output_size -> head_size, bias=head_bias) + int head_output_size = head1x1_active ? head1x1_out_channels : bottleneck; + r += conv1x1_memory(head_output_size, head_size, head_bias, 1, M); + + // Buffers: _layer_outputs(channels, M) + r.add_buffers((size_t)channels * M); + // _head_inputs(head_output_size, M) + r.add_buffers((size_t)head_output_size * M); + + return r; +} + +// Forward declaration for recursive condition_dsp +static MemoryResult compute_memory(const std::string& architecture, const json& config, int M); + +// WaveNet top-level memory +static MemoryResult wavenet_memory(const json& config, int M) +{ + MemoryResult r; + int in_channels = config.value("in_channels", 1); + + // condition_dim = in_channels (from _get_condition_dim()) + int condition_dim = in_channels; + + // Recursive condition_dsp + bool has_condition_dsp = false; + int condition_output_channels = condition_dim; + if (config.find("condition_dsp") != config.end()) + { + has_condition_dsp = true; + const json& cdsp = config["condition_dsp"]; + std::string cdsp_arch = cdsp["architecture"]; + json cdsp_config = cdsp["config"]; + r += compute_memory(cdsp_arch, cdsp_config, M); + // condition_output_channels comes from the condition_dsp's output + // For now, we use condition_size from first layer as a proxy + // (the actual model validates this match) + if (config.find("layers") != config.end() && config["layers"].size() > 0) + condition_output_channels = config["layers"][0]["condition_size"]; + } + + // _condition_input(condition_dim, M) + r.add_buffers((size_t)condition_dim * M); + + // _condition_output + if (!has_condition_dsp) + { + // _condition_output(condition_dim, M) + r.add_buffers((size_t)condition_dim * M); + } + else + { + // _condition_output(condition_output_channels, M) + r.add_buffers((size_t)condition_output_channels * M); + // _condition_dsp_input_buffers: condition_dim vectors of M doubles/floats + // These are std::vector> where NAM_SAMPLE is double + r.add_buffers((size_t)condition_dim * M * (sizeof(double) / sizeof(float))); + // _condition_dsp_output_buffers: condition_output_channels vectors of M doubles + r.add_buffers((size_t)condition_output_channels * M * (sizeof(double) / sizeof(float))); + // Pointer arrays are negligible + } + + // Layer arrays + for (const auto& layer_config : config["layers"]) + r += wavenet_layer_array_memory(layer_config, M); + + // _head_scale (1 float) — it's a weight + r.add_weights(1); + + return r; +} + +// ─── Dispatch ─────────────────────────────────────────────────────────────── + +static MemoryResult compute_memory(const std::string& architecture, const json& config, int M) +{ + if (architecture == "WaveNet") + return wavenet_memory(config, M); + if (architecture == "LSTM") + return lstm_memory(config); + if (architecture == "ConvNet") + return convnet_memory(config, M); + if (architecture == "Linear") + return linear_memory(config); + throw std::runtime_error("Unknown architecture: " + architecture); +} + +// ─── Formatting helpers ───────────────────────────────────────────────────── + +static std::string format_bytes(size_t bytes) +{ + char buf[64]; + if (bytes < 1024) + snprintf(buf, sizeof(buf), "%zu bytes", bytes); + else if (bytes < 1024 * 1024) + snprintf(buf, sizeof(buf), "%.2f KB", bytes / 1024.0); + else + snprintf(buf, sizeof(buf), "%.2f MB", bytes / (1024.0 * 1024.0)); + return buf; +} + +static std::string format_with_commas(size_t n) +{ + std::string s = std::to_string(n); + int insert_pos = (int)s.length() - 3; + while (insert_pos > 0) + { + s.insert(insert_pos, ","); + insert_pos -= 3; + } + return s; +} + +// ─── Main ─────────────────────────────────────────────────────────────────── + +int main(int argc, char* argv[]) +{ + if (argc < 2) + { + fprintf(stderr, "Usage: memory_usage [--buffer-size N]\n"); + return 1; + } + + const char* model_path = argv[1]; + int buffer_size = DEFAULT_BUFFER_SIZE; + + for (int i = 2; i < argc; i++) + { + if (strcmp(argv[i], "--buffer-size") == 0 && i + 1 < argc) + { + buffer_size = atoi(argv[++i]); + if (buffer_size <= 0) + { + fprintf(stderr, "Error: buffer size must be positive\n"); + return 1; + } + } + else + { + fprintf(stderr, "Unknown option: %s\n", argv[i]); + return 1; + } + } + + // Read and parse JSON + std::ifstream file(model_path); + if (!file.is_open()) + { + fprintf(stderr, "Error: cannot open %s\n", model_path); + return 1; + } + + json j; + try + { + file >> j; + } + catch (const std::exception& e) + { + fprintf(stderr, "Error parsing JSON: %s\n", e.what()); + return 1; + } + + std::string architecture = j["architecture"]; + json config = j["config"]; + + // Cross-check: count weights in JSON + size_t json_weight_count = 0; + if (j.find("weights") != j.end()) + json_weight_count = j["weights"].size(); + + double sample_rate = -1.0; + if (j.find("sample_rate") != j.end()) + sample_rate = j["sample_rate"]; + + try + { + MemoryResult result = compute_memory(architecture, config, buffer_size); + size_t total = result.weight_bytes + result.buffer_bytes; + + printf("Model: %s\n", model_path); + printf("Architecture: %s\n", architecture.c_str()); + if (sample_rate > 0) + printf("Sample rate: %.0f Hz\n", sample_rate); + printf("\n"); + printf("Weights: %s bytes (%s)\n", format_with_commas(result.weight_bytes).c_str(), + format_bytes(result.weight_bytes).c_str()); + printf("Buffers: %s bytes (%s) [buffer size: %d]\n", format_with_commas(result.buffer_bytes).c_str(), + format_bytes(result.buffer_bytes).c_str(), buffer_size); + printf("Total: %s bytes (%s)\n", format_with_commas(total).c_str(), format_bytes(total).c_str()); + + if (json_weight_count > 0) + { + printf("\nJSON weights: %zu values (%s bytes)\n", json_weight_count, + format_with_commas(json_weight_count * sizeof(float)).c_str()); + } + } + catch (const std::exception& e) + { + fprintf(stderr, "Error computing memory: %s\n", e.what()); + return 1; + } + + return 0; +} From c4b84f50b30b2573d4ec74a4dc408f2ab80c8678 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 20 Feb 2026 17:41:55 -0800 Subject: [PATCH 2/4] Add render tool for processing WAV files through .nam models - Add AudioDSPTools as submodule for WAV input (dsp::wav::Load) - Add tools/render: loads model, reads input WAV, processes, writes 32-bit float output - Usage: render [output.wav] - Supports mono input; validates sample rate matches model Co-authored-by: Cursor --- .gitmodules | 3 + Dependencies/AudioDSPTools | 1 + tools/CMakeLists.txt | 30 ++++++- tools/render.cpp | 159 +++++++++++++++++++++++++++++++++++++ 4 files changed, 192 insertions(+), 1 deletion(-) create mode 160000 Dependencies/AudioDSPTools create mode 100644 tools/render.cpp diff --git a/.gitmodules b/.gitmodules index 11c19841..f49ce6e8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "Dependencies/eigen"] path = Dependencies/eigen url = https://gitlab.com/libeigen/eigen +[submodule "Dependencies/AudioDSPTools"] + path = Dependencies/AudioDSPTools + url = https://github.com/sdatkinson/AudioDSPTools.git diff --git a/Dependencies/AudioDSPTools b/Dependencies/AudioDSPTools new file mode 160000 index 00000000..0827c6c2 --- /dev/null +++ b/Dependencies/AudioDSPTools @@ -0,0 +1 @@ +Subproject commit 0827c6c2fc0deced568536142ea86f189e0b98a1 diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 8f02f20e..bbe93f3e 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -4,14 +4,42 @@ file(GLOB_RECURSE NAM_SOURCES ../NAM/*.cpp ../NAM/*.c ../NAM*.h) set(TOOLS benchmodel) add_custom_target(tools ALL - DEPENDS ${TOOLS}) + DEPENDS ${TOOLS} render) + +set(AUDIO_DSP_TOOLS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../Dependencies/AudioDSPTools") +set(AUDIO_DSP_TOOLS_WAV_SOURCES "${AUDIO_DSP_TOOLS_DIR}/dsp/wav.cpp") include_directories(tools ..) include_directories(tools ${NAM_DEPS_PATH}/eigen) include_directories(tools ${NAM_DEPS_PATH}/nlohmann) +include_directories(tools ${AUDIO_DSP_TOOLS_DIR}/dsp) add_executable(loadmodel loadmodel.cpp ${NAM_SOURCES}) add_executable(benchmodel benchmodel.cpp ${NAM_SOURCES}) +add_executable(render render.cpp ${NAM_SOURCES} ${AUDIO_DSP_TOOLS_WAV_SOURCES}) +target_compile_features(render PUBLIC cxx_std_20) +# AudioDSPTools wav.cpp has sign-compare issues; don't fail build +set_source_files_properties(${AUDIO_DSP_TOOLS_WAV_SOURCES} PROPERTIES COMPILE_FLAGS "-Wno-error") +set_target_properties(render PROPERTIES + CXX_VISIBILITY_PRESET hidden + INTERPROCEDURAL_OPTIMIZATION TRUE + PREFIX "" +) +if (CMAKE_SYSTEM_NAME STREQUAL "Windows") + target_compile_definitions(render PRIVATE NOMINMAX WIN32_LEAN_AND_MEAN) +endif() +if (MSVC) + target_compile_options(render PRIVATE + "$<$:/W4>" + "$<$:/O2>" + ) +else() + target_compile_options(render PRIVATE + -Wall -Wextra -Wpedantic -Wstrict-aliasing -Wunreachable-code -Weffc++ -Wno-unused-parameter + "$<$:-Og;-ggdb;-Werror>" + "$<$:-Ofast>" + ) +endif() add_executable(memory_usage memory_usage.cpp) add_executable(run_tests run_tests.cpp test/allocation_tracking.cpp ${NAM_SOURCES}) # Compile run_tests without optimizations to ensure allocation tracking works correctly diff --git a/tools/render.cpp b/tools/render.cpp new file mode 100644 index 00000000..77836b41 --- /dev/null +++ b/tools/render.cpp @@ -0,0 +1,159 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NAM/dsp.h" +#include "NAM/get_dsp.h" +#include "wav.h" + +namespace +{ +// Write mono 32-bit float WAV file (IEEE float format 3). +bool SaveWavFloat32(const char* fileName, const float* samples, size_t numSamples, double sampleRate) +{ + std::ofstream out(fileName, std::ios::binary); + if (!out.is_open()) + { + std::cerr << "Error: Failed to open output file " << fileName << "\n"; + return false; + } + + const uint32_t dataSize = static_cast(numSamples * sizeof(float)); + const uint32_t chunkSize = 36 + dataSize; + + // RIFF header + out.write("RIFF", 4); + out.write(reinterpret_cast(&chunkSize), 4); + out.write("WAVE", 4); + + // fmt chunk (16 bytes for PCM/IEEE) + const uint32_t fmtSize = 16; + out.write("fmt ", 4); + out.write(reinterpret_cast(&fmtSize), 4); + const uint16_t audioFormat = 3; // IEEE float + out.write(reinterpret_cast(&audioFormat), 2); + const uint16_t numChannels = 1; + out.write(reinterpret_cast(&numChannels), 2); + const uint32_t sr = static_cast(sampleRate); + out.write(reinterpret_cast(&sr), 4); + const uint32_t byteRate = sr * sizeof(float); + out.write(reinterpret_cast(&byteRate), 4); + const uint16_t blockAlign = sizeof(float); + out.write(reinterpret_cast(&blockAlign), 2); + const uint16_t bitsPerSample = 32; + out.write(reinterpret_cast(&bitsPerSample), 2); + + // data chunk + out.write("data", 4); + out.write(reinterpret_cast(&dataSize), 4); + out.write(reinterpret_cast(samples), dataSize); + + return out.good(); +} + +} // namespace + +int main(int argc, char* argv[]) +{ + if (argc < 3 || argc > 4) + { + std::cerr << "Usage: render [output.wav]\n"; + return 1; + } + + const char* modelPath = argv[1]; + const char* inputPath = argv[2]; + const char* outputPath = (argc >= 4) ? argv[3] : "output.wav"; + + std::cerr << "Loading model [" << modelPath << "]\n"; + auto model = nam::get_dsp(std::filesystem::path(modelPath)); + if (!model) + { + std::cerr << "Failed to load model\n"; + return 1; + } + std::cerr << "Model loaded successfully\n"; + + std::vector inputAudio; + double inputSampleRate = 0.0; + auto loadResult = dsp::wav::Load(inputPath, inputAudio, inputSampleRate); + if (loadResult != dsp::wav::LoadReturnCode::SUCCESS) + { + std::cerr << "Failed to load input WAV: " << dsp::wav::GetMsgForLoadReturnCode(loadResult) << "\n"; + return 1; + } + + const double expectedRate = model->GetExpectedSampleRate(); + if (expectedRate > 0 && std::abs(inputSampleRate - expectedRate) > 0.5) + { + std::cerr << "Error: Input WAV sample rate (" << inputSampleRate + << " Hz) does not match model expected rate (" << expectedRate << " Hz)\n"; + return 1; + } + + const double sampleRate = expectedRate > 0 ? expectedRate : inputSampleRate; + const int bufferSize = 64; + model->Reset(sampleRate, bufferSize); + + const int inChannels = model->NumInputChannels(); + const int outChannels = model->NumOutputChannels(); + + if (inChannels != 1) + { + std::cerr << "Error: render tool currently supports mono input only (model has " << inChannels + << " input channels)\n"; + return 1; + } + + std::vector> inputBuffers(inChannels); + std::vector> outputBuffers(outChannels); + std::vector inputPtrs(inChannels); + std::vector outputPtrs(outChannels); + + for (int ch = 0; ch < inChannels; ch++) + { + inputBuffers[ch].resize(bufferSize, 0.0); + inputPtrs[ch] = inputBuffers[ch].data(); + } + for (int ch = 0; ch < outChannels; ch++) + { + outputBuffers[ch].resize(bufferSize, 0.0); + outputPtrs[ch] = outputBuffers[ch].data(); + } + + std::vector outputAudio; + outputAudio.reserve(static_cast(outChannels) * inputAudio.size()); + + size_t readPos = 0; + const size_t totalSamples = inputAudio.size(); + + while (readPos < totalSamples) + { + const size_t toRead = std::min(static_cast(bufferSize), totalSamples - readPos); + + for (size_t i = 0; i < toRead; i++) + inputBuffers[0][i] = static_cast(inputAudio[readPos + i]); + for (size_t i = toRead; i < static_cast(bufferSize); i++) + inputBuffers[0][i] = 0; + + model->process(inputPtrs.data(), outputPtrs.data(), static_cast(toRead)); + + for (size_t i = 0; i < toRead; i++) + outputAudio.push_back(static_cast(outputBuffers[0][i])); + + readPos += toRead; + } + + if (!SaveWavFloat32(outputPath, outputAudio.data(), outputAudio.size(), sampleRate)) + { + return 1; + } + + std::cerr << "Wrote " << outputAudio.size() << " samples to " << outputPath << "\n"; + return 0; +} From 4ec410e5cbbac54b123ddddd36bc621af9b189bd Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 20 Feb 2026 17:42:37 -0800 Subject: [PATCH 3/4] Add use of render tool in tests --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 62f2a7e2..929263b1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,3 +41,4 @@ jobs: ./build/tools/run_tests ./build/tools/benchmodel ./example_models/wavenet.nam ./build/tools/benchmodel ./example_models/lstm.nam + ./build/tools/render ./example_models/wavenet.nam ./example_models/wavenet.nam From c001c71ec505c9d13e0ccf3e6559e9b889ff7ddf Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Fri, 20 Feb 2026 17:48:16 -0800 Subject: [PATCH 4/4] Example input audio, fix render tool test --- .github/workflows/build.yml | 2 +- .gitignore | 2 ++ example_audio/input.wav | Bin 0 -> 288044 bytes 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 example_audio/input.wav diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 929263b1..83c452fb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,4 +41,4 @@ jobs: ./build/tools/run_tests ./build/tools/benchmodel ./example_models/wavenet.nam ./build/tools/benchmodel ./example_models/lstm.nam - ./build/tools/render ./example_models/wavenet.nam ./example_models/wavenet.nam + ./build/tools/render ./example_models/wavenet.nam ./example_audio/input.wav ./example_audio/output.wav diff --git a/.gitignore b/.gitignore index b7ee58e6..34ee36ee 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,5 @@ docs/_build/ *.DS_Store + +example_audio/output.wav diff --git a/example_audio/input.wav b/example_audio/input.wav new file mode 100644 index 0000000000000000000000000000000000000000..fd0302bd119965cb185437a0fc42dfda94193cd4 GIT binary patch literal 288044 zcmeI5`FBqD`oEuqm_x*zMk0|&A~Mg@v+F2I)mZb;(gCH$qN?Vqd9G7VOIt-zV_Q{2 zbw8Pjh)5#Fgv3llVh)kd|IayMvCIjzw^Xw!DfJjLX&qK}OzWW7kf@l53=v z8Woks`d7x!Wk%C{qjHb&!E~cbq>*~vdS{_E&DHw(mV|ol2|Jd>k1dIt+CJ{yqS(ht zF^9{eM>UBKY8PdViaZ$^;piV8-#Bbdc}Vxv;Jqt@o^}r$Q$3*PKL66*ZCBs({d%TP z;~H(&eB;%l%5&ockDLqc9j$KNzIT1I++{|G7K2tci$2?Aa`VPx`Z>>BNF?vc>EH)qX!dX8#HA zqZ!Y#Pi`;IeAY5+d!N%^tj+ekd}c&&?)_zXv&zpVe{k;e{PWg+7f)Zhbnepvi{;9r z@2?KA78l&O=CSU^iMMafu)A~a*xk8vN}6{njc-u4^kVteosWFJda|d-vwW`#>x&n& z&s0Y3tg_6r`(j|V*kFfZhnkKTYW3SyXUJFe4tJ^Fu(4C+g@*2%8Z{f*Ss3Z zJ=5ihkLyn-TR#2NeY3qsx3!*|T)jGN@-A!U({r(JqjJB}Uj7@lw@ZH<5D^pf{iNVS zKZm@V8rtGYSn>Vv3FQ%s??gU78@2Law9B%Xaqq=`;1rksZ`_;_@gtwb51pIvR%L?y zSnKRe>vj)g^eE%N&&I4wUya?oh?wK1cEG5WFf*#>Juh_%O& zgpSb(@!RA3)Q$gbc-+X}V)x#P+0`Oui8Xp!|EMh=MRxooV%3oFW1Yil`G)SU48EHl zRDDTceEWd&_x(NBwjCbmS7iD;?AoU2q?di?)>{sFxOulqUhGzS)AgzGM&z0nwF;Z1 zw`rO?y7AKO&hHdEO?7E7w!7oX*>&6RseS!I%>suStAeTz8(_bCj-_b(t6|wM7gki* zyq-_!`Sj_`$A@-5Y<{I|T9XIS-R~b-a&KtHojFdo=MB1P?zq10Rq>F4MdJ=%@$xLp zU3EFP#-*uWU#MT_{N3;JhXkJ8otf)DHOJNW%<Y3X@7r{+UcK^P)o|z9?40+CGBsVw0ue86vxD`H`=meZ49vrBh32| z=9H%9^6F-+#hh8&?Ayvb`j)wKs@e1}v&`P+Kh(BA*)}dFvGb9{)?Jf|t|xuCB6(hi z6c>k7yPVXzyVL5fO+WSZanE@t&Me8;vgzcoe=<{Uochk?^tS`Dr~h=uQl4|QTVBq- zvx{4vJGkb2eW#1@>n@dg6r4FwxT@#XGi61_hHEoB-3WPpvufY%C7<1$6j(Cw;r+rR z4>m6>Z{P3HAfG2oE1r(edcI}ril4)(7T;9IZ3 z6UXOfgJp{ve%!55n|e*gr!_5_+1xY8CFjB$|C`g&t(AL?L#7-ZVy37-dz)ou@|k zBBS-c#`qORLQms=)s6rDW__!p^>B7V!(Is=CdFs@#!s0WH$5}f-Z3`%&6rLjqX*B6 z+P*q+*t&=V%fqjI8Rj)K^w02+Y`frN$$|gO3}_V4u69n_o8$eSSMy!IvQ3$@_tX`w z$J=`bjc?_c=k|Vh%UR3cNWRiyX<+lSqnQZg#w#Q`ezd9lPLK z?FZNJSx~*%9{VY0Ex*2c)xf8+Vb2%NUp}9@_32M%9+%ra$_^{vIO@TnpYDHs>D~vf zcYpifcJhv!HJ@G2e&^cEtwkR>TrC(=*x~%;Tkl@-IC0_4?&tq|A%D{NvmTClC4c02 zesspcE&I~dtUG&8O<0tDW}FI|B#*( z7L@eI=ET_6iADQtsqfgHJv3MBFxQPWT@y_IH_VJ0rp5C5pZq(&!msmR`5AtapXYb+ zd-()Dhfm`(`D8wyZ{fT6M!uJC=NLE=j)$Y-*f>ItlcVLBIdYDlv*27fBhHJn;~Y6t z&X=?1+#vu5Xut$A@PQJnAO<(+!4Q)0geq(y3}vGcS>dd6Ry%8+mCyQT z7qA!D5$p?g2YZB_!hT`buy@!&>?3v)dy1XK{$iK0*Vu9FJ9Zy?ke$eWWLL5`*`e%H zb}M_9oy-1Z7qgey(d=t>H+!6&&VFatv-hb0)B&miHG#@NeV|HEE2tRMjn~!VKp~Zc zdO}s9woqZHGgKRD4wZ-cLlvSHQIV)iR3~Z_m5O>r)uMJ$!KhDdJvt6eneNIH_@T!Q*rQ9-(DmqjbU^wb-H@J0XQV&UCFzxPO!_9>lO9SZrJvGO>8*5F`Yhd+o=fMY|I&r& z#dKu)GToUTO{b<`)3xc{ba47O-JG6IXQ#i@<>~cweEL4!pA0|}AP^kUGd7BoJ~4X@pEdG9jOkQphSK7IF*eg$zTIA*S%tP`a|B!;nLL?$`5$TAGL{cIzk($U(Bq(weX^KomvLauRvdCH_E^-&? ziws5*Bae~F$Yvz8*Ex-}mVJ`sMt&p3k>yBqlY2?OWMGmod6-m8HYOpHlS#{DW|A}cnG{WyCQ*~CN!Mg- zk~Vpp)J^s#fs?~Y<79G@Ir*HFPF5$eliNw}WO$N1d7e~HwkP3}^GW+;ev&`=pDBP@ zfQf*)fa!o4fk}aRfvJJnfeC^+f@y-8g2{sUf+>SpgNcK=gXx1Agh_;XgsFttgb9T? zg=vMEg~^5ag(-$vhKYu`hUtbGhe?NdhpC6zhY5%|h-rwKh{=fgh$)F#iHV81iRp(Ss zjwz2>kBN`D@AdSzw;GcmGo#h_RbkJUUB0X+-15vJ`-xlCqh&!4{~S^Fczx;bXG*#{ z-E;4KXZiYD4~uW?3BTTAY4OUNS4VZdQu#;0FJ71T{%~=M>xE8R&P@u>e>*eJd3>&` zOU~X?*$tMTp5Hg?9p6*i9WwJvGMZdHv7zwz_qWr#SEb!(mA0yT>hwh^Cr>5s^GNP8 zH|cOmV(XEKn@Vh*=h`AXZ8J`r{>#kw`kIz@W~`H0!`>WaXTD$GyyI(D>t+7*jd}36 z8S7$e`h_i~*tTqN;-f2x{U;}V>zusgSaRcODRtvhJ2|GUzmt}olb)Dg}INu-S(v8m(N!AsHorO<*>?1 z*SxBtU3Tw&U9Dn}!~Ecy-&C*t$aE6XHK|Nbp~taJ8m&%oOV% zdDgAIM!-13*k)v&HnK_#qtbAxGKM`g8eA|!{xD9>G9JVm+e)letE^5w*3!KRfB7fO z`Ct5JFXJ}#ii`SR?E7b9-mDQ*qiu9y`>4uyBiD70xY!}QR$$lw$Ix0u!2@;$EgBnG z;2E$f%ino&+j)+D@jv=3^KFy3!z;LTYu|Mq7E7yoAGvL@xq7yEqwv!fkq4VCc-6$W zedF1)oo^j-N`6p(ikIWyzID&fuiblJO}~o{iPfvm4X{7n*J7FVYSFfrsaX{*E1o^{ ze!8>I<1O1tm51KkGw*txbHyKo71dsMWmsjwpJOkN zD86`j_=N_y&h49;U*dSS;hx-){d1!0oXJQ%{qgdw&cjaq5tDh-{bb9A8LJ#mOlxwy zreFHRx6^J+PF?d`%1@=q(QhWt*_gDUX3{US5|7#^zFKRW8)_R+V1Dv%mX^Xd~5BdK1kqsv%{F3pXzfV5P&+KEL)gxbQ?E8`UR>L}cH*-e&syDzK7YI2fA3D|Uvj{;bjkGx z-|jD;IOoy7y`H?`{mirC`GnIiHvd}LXLglaf4j|r)uuZ*7#C}-+FPskygHqG*SqCj zfAO6L>H8aYp3>--@FsIgo6g$W{LOwYW9(eFY-_ouqx+@Ht=f(EZ1=qNk{RCRkJ~gE z<+~-@uTQ9d-r{!G@&lq=gBJG--tbL`_pZ>=)UYFG!+Tzgn35a$)E4#2FVSyLk2(CG z*i%)pgZITHycgf&Zv5ma2}{cpz8YZ-O0lkVF&2+7>TfihPZ&#Y8ev6GeU5o~m*1XNu9&y$+sR@VPO1OL=-pwU`_)2VA3ugG@(wO&Jyw;Bxl!tvi7b+?_XJ;kBc z>fjnf233!nZ@>CCOLESu$d@nQ@qRI?$Mdo;pLW^)I4A#MS?%(g#)HqN-T(fNdlw$w zSsi`bZQjj-)7JyMuYEGRsHpHtMW@2*2QK$(d#S^(7u?#Nk2;ipx%=6nCAs$Na+-8H zbJ>!;DJ5&;x2IgjW%lWPazXoy+i@ojwmN!L7* z*8DfIOY=nUUu_8;Y`@+!KmWx%Fw&eFVP%-ko=^jvyvBvro1XoN&h$X@cOi^Gt&o*JHGkT6YosQa9wcn z(&o%=8K;6?W&I~EyTiOQC1-M;w##d=>TJ!I`I9G{Z%}e!$;3;mD=%lSDty@X>dzTP z^Cn%3cfTtT)k z(^{*ZF{z(1X0358(a5`IL_9YvmN)q)|IV-Q>-<-KhM(l;`Ca^8K7r5S)A&q2na}52 z_%6PY@8#P$29AW|;ixz^j*#QzXgOw%oa5&#I2X={^Wy9{N6wV<<*Ye(2*3dvFo6tw zpad(3!3}yagd{wn3R?)n8QL(1Jp8!|TnnxU*M+OYHR4Kfy|`*zJFXztk*moy<;rq> zxyoE?t~l47tIrx>C9ob?6|4x$LI8e^rg-dJ_4Jysy= zkk!bVWM#5GS*5I1RxIn5)yo=YC9|Gc)vRq+IP09%&YEZCv;Nry>;-lN`-0uU9$}}j zU)VM59d;1=h~31VVrQ|x*k$ZBb{zYT-NzneC$b;emF!J+DEpM%%ARHCvVYmd>}7T| z`!wNYo{&6E%uTMZKbGQM;&M)G?|VHI2$feWS`z>!^6tJ*poykV;5Bq$*My zsgTr3swFj(%1QmCic(9dsMJ-eD>ar%OTDG)QhTYu)M2VIHJQpxeWprNtEt%3ZK^jl zoJvkTr>axisqoZ!sy#KI%1`~L3(yPb2=oQI13iLHLBF7D&^zcL^bxuVJ%!Fff1%6J zYv?%i9l8%ah)zU5qASsx=uq@2x)nW(&PD&Ci_y#IX!JF@8$IrIr!&sc_2_+cK>8rv zke*0qq(9On>6LU$`X=3z9!e*rpVC$7t#nxWEZvr#OXsEk(uL{8bY%K6-I*Rur>0-i zwdvh-aQZmioSsf+r@zzX>GgDc`aa#C3_ubf50DDT1|$S>0%?KFKyn~IkRr$uBnol` z>4J;U*5ON44=O(QX(&rn#fKhC~_2OicCeaB43fR$XX;Wau?}~3`P1vG9(|863L1rMsg$Rkqk+a zBu|nm$(AHcawch$%t`Vjf09DUq9jssDe07qN>U}Sl3K~GBv^7RY4&xdZ4^kpCFQ=( zx;uTzy`*0$>F4N zGC9edd`?OytCQHt?WA`yJV~BBPpT)|lkmy;qONM8I6Ybij7^G{H>4WWjvFl)Ll*FvW#Khdh^u!FsB*i?%RK;w?gvFf2 zw8hNDc#K+wCdiv}6 zl1Y$xQ1=IQe^B=ab$?Lz2X%i?_Xl-iI!EKd9#i^?p&kUsUfG z)%!*Deo?(&RPPtn`$hGBQN3SO?-$kkMfHAByCDF2}R zgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(4Q_ZG!IPKr5P9zCi_G;aa2Mn#^C zjBxZ1k8d2draYv3YVh8bL0Cn`R1fI6&%d;H+tv4cznCDF2}RgYpl` zKPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}RgYpl`KPdm8{Dblj%0DRop!|dK z56V9%|DgPX@(;>CDF2}RgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}R zgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}RgYpl`KPdm8{Dblj%0DRo zp!|dK56V9%|DgPX@(;>CDF2}RgB_z2e^CBG`3L16lz&kELHP&e zAC!Mk{z3T%e^CBG`3L16lz*^c%)QxP-kD|I`mFBF?Sro`-CsP> zv1tGJE8ET$Ye^CBG`3L16lz&kELHP&eAC!Mk{z3T%e^CBG`3L16lz&kE zLHP&eAC!Mk{z3T%e^CBG`3L16lz&kELHP&eAC!Mk{z3T%e^CBG`3L16lz&kELHP&eAC!Mk{z3T%e^CBG`3L16 zlz&kELHP&eAC!Mk{z3T%e^CBG`3L16lz&kELHP&eAC!Mk{z3T%dxQZ@3z)+ldD&!P2OeAe0nbSZB*`8 z+RLA}fu%nVh=>XLep2wEpF`eF4Q+8HtoVL7cA>?0BA=g)T6r+qWm(L)_hRKAlz&kE zLHP&eAC!Mk{z3T%e^CBG`3L16lz&kELHP&eAC!Mk{z3T%e^CBG`3L16lz&kELHP&eAC!Mk{z3T%e^CBG`3L16 zlz&kELHP&eAC!Mk{z3T%e^CBG`3L16lz&kELHP&eAC!Mk{z3T% ze^CBG`3L16lz&kELHP&eAC!Nv#-Ud756V9%|DgPX@(;>CDF2}R zgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}RgYpl`Ke*+i$c~>xtQr!2 ztaDf`-_YHa!FSVxsxJwQZy#{}zQ5<%w!;JcicFt}UE36$^s?{VddndXH}6)-i``0Z zx;{1Dh+NZxKWj~I(=>N<e66rcgK~p>$cxhnCDF2}RgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}RgYpl` zKPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}RgYpl`KPdm8{Dblj%0DRop!|dK z56V9%|DgPX@(;>CDF2}RgYpl`KPdm8{Dblj%0DRop!|dK56V9%|DgPX@(;>CDF2}R hgYpl`KPdm8{Dblj%0DRop!|dK56VCI|EYiQ{{ZfUN{0Xd literal 0 HcmV?d00001