Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ jobs:
./build/tools/run_tests
./build/tools/benchmodel ./example_models/wavenet.nam
./build/tools/benchmodel ./example_models/lstm.nam
./build/tools/render ./example_models/wavenet.nam ./example_audio/input.wav ./example_audio/output.wav
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@

docs/_build/
*.DS_Store

example_audio/output.wav
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "Dependencies/eigen"]
path = Dependencies/eigen
url = https://gitlab.com/libeigen/eigen
[submodule "Dependencies/AudioDSPTools"]
path = Dependencies/AudioDSPTools
url = https://github.com/sdatkinson/AudioDSPTools.git
1 change: 1 addition & 0 deletions Dependencies/AudioDSPTools
Submodule AudioDSPTools added at 0827c6
4 changes: 4 additions & 0 deletions NAM/conv1d.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "conv1d.h"
#include "profiling.h"
#include <stdexcept>

namespace nam
Expand Down Expand Up @@ -143,6 +144,9 @@ void Conv1D::SetMaxBufferSize(const int maxBufferSize)

void Conv1D::Process(const Eigen::MatrixXf& input, const int num_frames)
{
// Note: Profiling is done at the caller level (e.g., _Layer::Process in wavenet.cpp)
// to avoid double-counting when Conv1D is called from within profiled blocks.

// Write input to ring buffer
_input_buffer.Write(input, num_frames);

Expand Down
4 changes: 4 additions & 0 deletions NAM/dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <unordered_set>

#include "dsp.h"
#include "profiling.h"
#include "registry.h"

#define tanh_impl_ std::tanh
Expand Down Expand Up @@ -443,6 +444,9 @@ Eigen::MatrixXf nam::Conv1x1::process(const Eigen::MatrixXf& input, const int nu

void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, const int num_frames)
{
// Note: Profiling is done at the caller level (e.g., _Layer::Process in wavenet.cpp)
// to provide meaningful categories (input_mixin, layer1x1, head1x1, rechannel)
// rather than generic conv1x1.
assert(num_frames <= _output.cols());

if (this->_is_depthwise)
Expand Down
4 changes: 4 additions & 0 deletions NAM/film.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,13 @@ class FiLM
assert(num_frames <= condition.cols());
assert(num_frames <= _output.cols());

// Conv1x1 to compute scale/shift from condition
_cond_to_scale_shift.process_(condition, num_frames);
const auto& scale_shift = _cond_to_scale_shift.GetOutput();

// Note: FiLM time is included in the caller's profiling category (e.g., conv1d, input_mixin)
// rather than tracked separately, to avoid double-counting.

const auto scale = scale_shift.topRows(get_input_dim()).leftCols(num_frames);
if (_do_shift)
{
Expand Down
88 changes: 88 additions & 0 deletions NAM/profiling.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#include "profiling.h"

#ifdef NAM_PROFILING

#if defined(__ARM_ARCH_7EM__) || defined(ARM_MATH_CM7)
// ARM Cortex-M7: Use DWT cycle counter for precise timing
#include "stm32h7xx.h"

namespace nam {
namespace profiling {

ProfilingEntry g_entries[MAX_PROFILING_TYPES] = {};
int g_num_entries = 0;

// CPU frequency in MHz (Daisy runs at 480 MHz)
static constexpr uint32_t CPU_FREQ_MHZ = 480;

uint32_t get_time_us() {
// DWT->CYCCNT gives cycle count
// Divide by CPU_FREQ_MHZ to get microseconds
return DWT->CYCCNT / CPU_FREQ_MHZ;
}

} // namespace profiling
} // namespace nam

#else
// Non-ARM: Use std::chrono for timing (for testing on desktop)
#include <chrono>

namespace nam {
namespace profiling {

ProfilingEntry g_entries[MAX_PROFILING_TYPES] = {};
int g_num_entries = 0;

uint32_t get_time_us() {
using namespace std::chrono;
static auto start = high_resolution_clock::now();
auto now = high_resolution_clock::now();
return (uint32_t)duration_cast<microseconds>(now - start).count();
}

} // namespace profiling
} // namespace nam

#endif // ARM check

namespace nam {
namespace profiling {

int register_type(const char* name) {
int idx = g_num_entries++;
g_entries[idx].name = name;
g_entries[idx].accumulated_us = 0;
return idx;
}

void reset() {
for (int i = 0; i < g_num_entries; i++)
g_entries[i].accumulated_us = 0;
}

void print_results() {
uint32_t total = 0;
for (int i = 0; i < g_num_entries; i++)
total += g_entries[i].accumulated_us;

printf("\nProfiling breakdown:\n");
printf("%-12s %8s %6s\n", "Category", "Time(ms)", "%");
printf("%-12s %8s %6s\n", "--------", "--------", "----");

for (int i = 0; i < g_num_entries; i++) {
uint32_t us = g_entries[i].accumulated_us;
if (us > 0) {
uint32_t pct = total > 0 ? (us * 100 / total) : 0;
printf("%-12s %8.1f %5lu%%\n", g_entries[i].name, us / 1000.0f, (unsigned long)pct);
}
}

printf("%-12s %8s %6s\n", "--------", "--------", "----");
printf("%-12s %8.1f %5s\n", "Total", total / 1000.0f, "100%");
}

} // namespace profiling
} // namespace nam

#endif // NAM_PROFILING
85 changes: 85 additions & 0 deletions NAM/profiling.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#pragma once

// Dynamic profiling registry for NAM building blocks
// Enable with -DNAM_PROFILING
//
// Usage:
// 1. Register profiling types at file scope (static init):
// static int PROF_FOO = nam::profiling::register_type("Foo");
// 2. Call nam::profiling::reset() before benchmark
// 3. In hot path:
// NAM_PROFILE_START();
// // ... code ...
// NAM_PROFILE_ADD(PROF_FOO);
// 4. Call nam::profiling::print_results() to display breakdown

#ifdef NAM_PROFILING

#include <cstdint>
#include <cstdio>

namespace nam {
namespace profiling {

constexpr int MAX_PROFILING_TYPES = 32;

struct ProfilingEntry {
const char* name;
uint32_t accumulated_us;
};

extern ProfilingEntry g_entries[MAX_PROFILING_TYPES];
extern int g_num_entries;

// Register a named profiling type. Returns index for fast accumulation.
// Called at static-init time or during setup, NOT in the hot path.
int register_type(const char* name);

// Get current time in microseconds (platform-specific)
uint32_t get_time_us();

// Reset all profiling counters
void reset();

// Print profiling results to stdout
void print_results();

// Helper macros for timing sections
// Usage:
// NAM_PROFILE_START();
// // ... code to profile ...
// NAM_PROFILE_ADD(PROF_FOO); // Adds elapsed time to entry, resets timer

#define NAM_PROFILE_START() uint32_t _prof_start = nam::profiling::get_time_us()
#define NAM_PROFILE_ADD(idx) do { \
uint32_t _prof_now = nam::profiling::get_time_us(); \
nam::profiling::g_entries[idx].accumulated_us += (_prof_now - _prof_start); \
_prof_start = _prof_now; \
} while(0)

// Variant that doesn't reset the timer (for one-shot measurements)
#define NAM_PROFILE_ADD_NORESTART(idx) \
nam::profiling::g_entries[idx].accumulated_us += (nam::profiling::get_time_us() - _prof_start)

// Reset the timer without recording (for re-syncing mid-function)
#define NAM_PROFILE_RESTART() _prof_start = nam::profiling::get_time_us()

} // namespace profiling
} // namespace nam

#else // NAM_PROFILING not defined

// No-op macros when profiling is disabled
#define NAM_PROFILE_START() ((void)0)
#define NAM_PROFILE_ADD(idx) ((void)0)
#define NAM_PROFILE_ADD_NORESTART(idx) ((void)0)
#define NAM_PROFILE_RESTART() ((void)0)

namespace nam {
namespace profiling {
inline void reset() {}
inline void print_results() {}
} // namespace profiling
} // namespace nam

#endif // NAM_PROFILING
31 changes: 31 additions & 0 deletions NAM/wavenet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,20 @@
#include <Eigen/Dense>

#include "get_dsp.h"
#include "profiling.h"
#include "registry.h"
#include "wavenet.h"

#ifdef NAM_PROFILING
static int PROF_CONV1D = nam::profiling::register_type("Conv1D");
static int PROF_INPUT_MIXIN = nam::profiling::register_type("InputMixin");
static int PROF_LAYER1X1 = nam::profiling::register_type("Layer1x1");
static int PROF_HEAD1X1 = nam::profiling::register_type("Head1x1");
static int PROF_RECHANNEL = nam::profiling::register_type("Rechannel");
static int PROF_ACTIVATION = nam::profiling::register_type("Activation");
static int PROF_COPIES = nam::profiling::register_type("Copies");
#endif

// Layer ======================================================================

void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize)
Expand Down Expand Up @@ -89,6 +100,8 @@ void nam::wavenet::_Layer::set_weights_(std::vector<float>::iterator& weights)

void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames)
{
NAM_PROFILE_START();

const long bottleneck = this->_bottleneck; // Use the actual bottleneck value, not the doubled output channels

// Step 1: input convolutions
Expand All @@ -107,6 +120,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
Eigen::MatrixXf& conv_output = this->_conv.GetOutput();
this->_conv_post_film->Process_(conv_output, condition, num_frames);
}
NAM_PROFILE_ADD(PROF_CONV1D);

if (this->_input_mixin_pre_film)
{
Expand All @@ -123,8 +137,12 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
Eigen::MatrixXf& input_mixin_output = this->_input_mixin.GetOutput();
this->_input_mixin_post_film->Process_(input_mixin_output, condition, num_frames);
}
NAM_PROFILE_ADD(PROF_INPUT_MIXIN);

this->_z.leftCols(num_frames).noalias() =
_conv.GetOutput().leftCols(num_frames) + _input_mixin.GetOutput().leftCols(num_frames);
NAM_PROFILE_ADD(PROF_COPIES);

if (this->_activation_pre_film)
{
this->_activation_pre_film->Process_(this->_z, condition, num_frames);
Expand All @@ -139,13 +157,15 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
if (this->_gating_mode == GatingMode::NONE)
{
this->_activation->apply(this->_z.leftCols(num_frames));
NAM_PROFILE_ADD(PROF_ACTIVATION);
if (this->_activation_post_film)
{
this->_activation_post_film->Process_(this->_z, condition, num_frames);
}
if (this->_layer1x1)
{
this->_layer1x1->process_(this->_z, num_frames);
NAM_PROFILE_ADD(PROF_LAYER1X1);
}
}
else if (this->_gating_mode == GatingMode::GATED)
Expand All @@ -155,6 +175,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
auto input_block = this->_z.leftCols(num_frames);
auto output_block = this->_z.topRows(bottleneck).leftCols(num_frames);
this->_gating_activation->apply(input_block, output_block);
NAM_PROFILE_ADD(PROF_ACTIVATION);
if (this->_activation_post_film)
{
// Use Process() for blocks and copy result back
Expand All @@ -165,6 +186,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
if (this->_layer1x1)
{
this->_layer1x1->process_(this->_z.topRows(bottleneck), num_frames);
NAM_PROFILE_ADD(PROF_LAYER1X1);
}
}
else if (this->_gating_mode == GatingMode::BLENDED)
Expand All @@ -174,6 +196,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
auto input_block = this->_z.leftCols(num_frames);
auto output_block = this->_z.topRows(bottleneck).leftCols(num_frames);
this->_blending_activation->apply(input_block, output_block);
NAM_PROFILE_ADD(PROF_ACTIVATION);
if (this->_activation_post_film)
{
// Use Process() for blocks and copy result back
Expand All @@ -184,6 +207,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
if (this->_layer1x1)
{
this->_layer1x1->process_(this->_z.topRows(bottleneck), num_frames);
NAM_PROFILE_ADD(PROF_LAYER1X1);
if (this->_layer1x1_post_film)
{
Eigen::MatrixXf& layer1x1_output = this->_layer1x1->GetOutput();
Expand All @@ -207,6 +231,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
Eigen::MatrixXf& head1x1_output = this->_head1x1->GetOutput();
this->_head1x1_post_film->Process_(head1x1_output, condition, num_frames);
}
NAM_PROFILE_ADD(PROF_HEAD1X1);
this->_output_head.leftCols(num_frames).noalias() = this->_head1x1->GetOutput().leftCols(num_frames);
}
else // No head 1x1
Expand All @@ -230,6 +255,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
// If layer1x1 is inactive, residual connection is just the input (identity)
this->_output_next_layer.leftCols(num_frames).noalias() = input.leftCols(num_frames);
}
NAM_PROFILE_ADD(PROF_COPIES);
}

// LayerArray =================================================================
Expand Down Expand Up @@ -298,9 +324,12 @@ void nam::wavenet::_LayerArray::Process(const Eigen::MatrixXf& layer_inputs, con
void nam::wavenet::_LayerArray::ProcessInner(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition,
const int num_frames)
{
NAM_PROFILE_START();

// Process rechannel and get output
this->_rechannel.process_(layer_inputs, num_frames);
Eigen::MatrixXf& rechannel_output = _rechannel.GetOutput();
NAM_PROFILE_ADD(PROF_RECHANNEL);

// Process layers
for (size_t i = 0; i < this->_layers.size(); i++)
Expand Down Expand Up @@ -329,7 +358,9 @@ void nam::wavenet::_LayerArray::ProcessInner(const Eigen::MatrixXf& layer_inputs
this->_layers[last_layer].GetOutputNextLayer().leftCols(num_frames);

// Process head rechannel
NAM_PROFILE_RESTART();
_head_rechannel.process_(this->_head_inputs, num_frames);
NAM_PROFILE_ADD(PROF_RECHANNEL);
}


Expand Down
Binary file added example_audio/input.wav
Binary file not shown.
Loading