Skip to content

Commit fc71209

Browse files
cynodesmuscoderabbitai[bot]ServeurpersoCom
authored
Fix Windows build errors and thread management for dynamic backend loading (-DGGML_BACKEND_DL=ON) (#22)
* Fix Windows MSVC build for GGML DL mode and update to Registry API * Update src/backend.h Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * Apply n_threads configuration to tokenizer backend * Fix thread params in fallback path for tokenizer * Standardize CPU backend initialization with thread params * Remove outdated comment and finalize CPU initialization * Finalize CPU initialization logic with thread param propagation * Update CMakeLists.txt --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: Pascal <admin@serveurperso.com>
1 parent 36589b0 commit fc71209

3 files changed

Lines changed: 49 additions & 15 deletions

File tree

CMakeLists.txt

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,16 @@ macro(link_ggml_backends target)
4141
target_compile_options(${target} PRIVATE -Wall -Wextra -Wshadow -Wconversion
4242
-Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion)
4343
endif()
44-
target_link_libraries(${target} PRIVATE ggml ggml-base ggml-cpu)
45-
foreach(backend blas cuda metal vulkan)
44+
target_link_libraries(${target} PRIVATE ggml)
45+
if(TARGET ggml-base)
46+
target_link_libraries(${target} PRIVATE ggml-base)
47+
endif()
48+
foreach(backend cpu blas cuda metal vulkan)
4649
if(TARGET ggml-${backend})
47-
target_link_libraries(${target} PRIVATE ggml-${backend})
50+
get_target_property(CURRENT_BACKEND_TYPE ggml-${backend} TYPE)
51+
if (NOT CURRENT_BACKEND_TYPE STREQUAL "MODULE_LIBRARY")
52+
target_link_libraries(${target} PRIVATE ggml-${backend})
53+
endif()
4854
string(TOUPPER ${backend} BACKEND_UPPER)
4955
target_compile_definitions(${target} PRIVATE ACESTEP_HAVE_${BACKEND_UPPER})
5056
if(backend STREQUAL "cuda")

src/backend.h

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
// qwen3.h, qwen3-lm.h, cond.h, dit.h, vae.h.
77

88
#include "ggml-backend.h"
9-
#include "ggml-cpu.h"
109
#ifdef ACESTEP_HAVE_CUDA
1110
// Query compute capability without pulling in cuda_runtime.h.
1211
// cudaDeviceGetAttribute takes an int enum value; we pass the raw constants.
@@ -45,23 +44,34 @@ static BackendPair backend_init(const char * label) {
4544
fprintf(stderr, "[Load] FATAL: no backend available\n");
4645
exit(1);
4746
}
47+
bool best_is_cpu = (strcmp(ggml_backend_name(bp.backend), "CPU") == 0);
4848
int n_threads = (int) std::thread::hardware_concurrency() / 2;
4949
if (n_threads < 1) {
5050
n_threads = 1;
5151
}
52-
// [GGML] If best backend is already CPU, reuse it (avoid 2 CPU instances
53-
// where only one gets the thread count)
54-
bool best_is_cpu = (strcmp(ggml_backend_name(bp.backend), "CPU") == 0);
52+
// Initialize CPU backend with explicit thread count
53+
char params[64];
54+
snprintf(params, sizeof(params), "n_threads=%d", n_threads);
55+
auto init_cpu_backend = [&]() -> ggml_backend_t {
56+
ggml_backend_dev_t cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
57+
if (cpu_dev) {
58+
if (ggml_backend_t cpu = ggml_backend_dev_init(cpu_dev, params)) {
59+
return cpu;
60+
}
61+
}
62+
return ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, params);
63+
};
64+
5565
if (best_is_cpu) {
66+
ggml_backend_free(bp.backend);
67+
bp.backend = init_cpu_backend();
5668
bp.cpu_backend = bp.backend;
57-
ggml_backend_cpu_set_n_threads(bp.backend, n_threads);
5869
} else {
59-
bp.cpu_backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
60-
if (!bp.cpu_backend) {
61-
fprintf(stderr, "[Load] FATAL: failed to init CPU backend\n");
62-
exit(1);
63-
}
64-
ggml_backend_cpu_set_n_threads(bp.cpu_backend, n_threads);
70+
bp.cpu_backend = init_cpu_backend();
71+
}
72+
if (!bp.cpu_backend) {
73+
fprintf(stderr, "[Load] FATAL: failed to init CPU backend\n");
74+
exit(1);
6575
}
6676
fprintf(stderr, "[Load] %s backend: %s (CPU threads: %d)\n", label, ggml_backend_name(bp.backend), n_threads);
6777

tools/ace-understand.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <cstring>
2929
#include <random>
3030
#include <string>
31+
#include <thread>
3132
#include <unordered_map>
3233
#include <vector>
3334

@@ -390,7 +391,24 @@ int main(int argc, char ** argv) {
390391
// Tokenizer weights live in the DiT GGUF (prefix "tokenizer.")
391392
Timer t_tok;
392393
TokGGML tok = {};
393-
ggml_backend_t be_tok = ggml_backend_cpu_init();
394+
ggml_backend_dev_t dev_cpu = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
395+
ggml_backend_t be_tok = NULL;
396+
int n_threads = (int) std::thread::hardware_concurrency() / 2;
397+
if (n_threads < 1) {
398+
n_threads = 1;
399+
}
400+
char params[64];
401+
snprintf(params, sizeof(params), "n_threads=%d", n_threads);
402+
if (dev_cpu) {
403+
be_tok = ggml_backend_dev_init(dev_cpu, params);
404+
}
405+
if (!be_tok) {
406+
be_tok = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, params);
407+
}
408+
if (!be_tok) {
409+
fprintf(stderr, "[Tok] FATAL: failed to init CPU backend\n");
410+
return 1;
411+
}
394412
if (!tok_ggml_load(&tok, dit_gguf, be_tok, be_tok)) {
395413
fprintf(stderr, "[Tok] FATAL: load failed\n");
396414
ggml_backend_free(be_tok);

0 commit comments

Comments
 (0)