Skip to content

Commit 3f62282

Browse files
committed
Force sequencial tensor loading when using RPC
1 parent e511f77 commit 3f62282

3 files changed

Lines changed: 56 additions & 7 deletions

File tree

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@ if(SD_MUSA)
8484
endif()
8585
endif()
8686

87+
if (SD_RPC)
88+
message("-- Use RPC as backend stable-diffusion")
89+
set(GGML_RPC ON)
90+
add_definitions(-DSD_USE_RPC)
91+
endif ()
92+
8793
set(SD_LIB stable-diffusion)
8894

8995
file(GLOB SD_LIB_SOURCES

docs/rpc.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ This guide covers how to build a version of [the RPC server from `llama.cpp`](ht
66
77
## 1. Building `stable-diffusion.cpp` with RPC client
88

9-
First, you should build the client application from source. It requires `GGML_RPC=ON` to include the RPC backend to your client.
9+
First, you should build the client application from source. It requires `SD_RPC=ON` to include the RPC backend to your client.
1010

1111
```bash
1212
mkdir build
1313
cd build
1414
cmake .. \
15-
-DGGML_RPC=ON \
15+
-DSD_RPC=ON \
1616
# Add other build flags here (e.g., -DSD_VULKAN=ON)
1717
cmake --build . --config Release -j $(nproc)
1818
```

src/stable-diffusion.cpp

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
#include "latent-preview.h"
2626
#include "name_conversion.h"
2727

28+
#if SD_USE_RPC
29+
#include "ggml-rpc.h"
30+
#endif
31+
2832
const char* model_version_to_str[] = {
2933
"SD 1.x",
3034
"SD 1.x Inpaint",
@@ -834,7 +838,13 @@ class StableDiffusionGGML {
834838
}
835839
return false;
836840
};
837-
if (!pmid_lora->load_from_file(n_threads, lora_tensor_filter)) {
841+
int n_th = n_threads;
842+
#ifdef SD_USE_RPC
843+
if (ggml_backend_is_rpc(diffusion_backend)) {
844+
n_th = 1; // avoid multi-thread for loading to remote
845+
}
846+
#endif
847+
if (!pmid_lora->load_from_file(n_th, lora_tensor_filter)) {
838848
LOG_WARN("load photomaker lora tensors from %s failed", sd_ctx_params->photo_maker_path);
839849
return false;
840850
}
@@ -929,7 +939,22 @@ class StableDiffusionGGML {
929939
if (version == VERSION_SVD) {
930940
ignore_tensors.insert("conditioner.embedders.3");
931941
}
932-
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_threads, sd_ctx_params->enable_mmap);
942+
int n_th = n_threads;
943+
#ifdef SD_USE_RPC
944+
// TODO: maybe set it to 1 threads only for model parts that are on remote?
945+
bool is_any_clip_rpc = false;
946+
for (auto& backend : clip_backends) {
947+
if (ggml_backend_is_rpc(backend)) {
948+
is_any_clip_rpc = true;
949+
}
950+
}
951+
// I think those are all the backends that should get sent data to when calling model_loader.load_tensors()
952+
if (is_any_clip_rpc || ggml_backend_is_rpc(diffusion_backend) || ggml_backend_is_rpc(vae_backend) || ggml_backend_is_rpc(vision_backend) || ggml_backend_is_rpc(pmid_backend)) {
953+
LOG_DEBUG("Using single-thread for tensor loading because RPC backend is used");
954+
n_th = 1; // avoid multi-thread for loading to remote
955+
}
956+
#endif
957+
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_th, sd_ctx_params->enable_mmap);
933958
if (!success) {
934959
LOG_ERROR("load tensors from model loader failed");
935960
ggml_free(ctx);
@@ -949,15 +974,27 @@ class StableDiffusionGGML {
949974
vae_params_mem_size = first_stage_model->get_params_buffer_size();
950975
}
951976
if (use_tiny_autoencoder || version == VERSION_SDXS) {
952-
if (use_tiny_autoencoder && !tae_first_stage->load_from_file(taesd_path, n_threads)) {
977+
int n_th = n_threads;
978+
#ifdef SD_USE_RPC
979+
if (ggml_backend_is_rpc(tae_backend)) {
980+
n_th = 1; // avoid multi-thread for loading to remote
981+
}
982+
#endif
983+
if (use_tiny_autoencoder && !tae_first_stage->load_from_file(taesd_path, n_th)) {
953984
return false;
954985
}
955986
use_tiny_autoencoder = true; // now the processing is identical for VERSION_SDXS
956987
vae_params_mem_size = tae_first_stage->get_params_buffer_size();
957988
}
958989
size_t control_net_params_mem_size = 0;
959990
if (control_net) {
960-
if (!control_net->load_from_file(SAFE_STR(sd_ctx_params->control_net_path), n_threads)) {
991+
int n_th = n_threads;
992+
#ifdef SD_USE_RPC
993+
if (ggml_backend_is_rpc(control_net_backend)) {
994+
n_th = 1; // avoid multi-thread for loading to remote
995+
}
996+
#endif
997+
if (!control_net->load_from_file(SAFE_STR(sd_ctx_params->control_net_path), n_th)) {
961998
return false;
962999
}
9631000
control_net_params_mem_size = control_net->get_params_buffer_size();
@@ -1170,7 +1207,13 @@ class StableDiffusionGGML {
11701207
LOG_DEBUG("high noise lora: %s", lora_path.c_str());
11711208
}
11721209
auto lora = std::make_shared<LoraModel>(lora_id, backend, lora_path, is_high_noise ? "model.high_noise_" : "", version);
1173-
if (!lora->load_from_file(n_threads, lora_tensor_filter)) {
1210+
int n_th = n_threads;
1211+
#ifdef SD_USE_RPC
1212+
if (ggml_backend_is_rpc(backend)) {
1213+
n_th = 1; // avoid multi-thread for loading to remote
1214+
}
1215+
#endif
1216+
if (!lora->load_from_file(n_th, lora_tensor_filter)) {
11741217
LOG_WARN("load lora tensors from %s failed", lora_path.c_str());
11751218
return nullptr;
11761219
}

0 commit comments

Comments
 (0)