2525#include " latent-preview.h"
2626#include " name_conversion.h"
2727
28+ #if SD_USE_RPC
29+ #include " ggml-rpc.h"
30+ #endif
31+
2832const char * model_version_to_str[] = {
2933 " SD 1.x" ,
3034 " SD 1.x Inpaint" ,
@@ -834,7 +838,13 @@ class StableDiffusionGGML {
834838 }
835839 return false ;
836840 };
837- if (!pmid_lora->load_from_file (n_threads, lora_tensor_filter)) {
841+ int n_th = n_threads;
842+ #ifdef SD_USE_RPC
843+ if (ggml_backend_is_rpc (diffusion_backend)) {
844+ n_th = 1 ; // avoid multi-thread for loading to remote
845+ }
846+ #endif
847+ if (!pmid_lora->load_from_file (n_th, lora_tensor_filter)) {
838848 LOG_WARN (" load photomaker lora tensors from %s failed" , sd_ctx_params->photo_maker_path );
839849 return false ;
840850 }
@@ -929,7 +939,22 @@ class StableDiffusionGGML {
929939 if (version == VERSION_SVD) {
930940 ignore_tensors.insert (" conditioner.embedders.3" );
931941 }
932- bool success = model_loader.load_tensors (tensors, ignore_tensors, n_threads, sd_ctx_params->enable_mmap );
942+ int n_th = n_threads;
943+ #ifdef SD_USE_RPC
944+ // TODO: maybe set it to 1 threads only for model parts that are on remote?
945+ bool is_any_clip_rpc = false ;
946+ for (auto & backend : clip_backends) {
947+ if (ggml_backend_is_rpc (backend)) {
948+ is_any_clip_rpc = true ;
949+ }
950+ }
951+ // I think those are all the backends that should get sent data to when calling model_loader.load_tensors()
952+ if (is_any_clip_rpc || ggml_backend_is_rpc (diffusion_backend) || ggml_backend_is_rpc (vae_backend) || ggml_backend_is_rpc (vision_backend) || ggml_backend_is_rpc (pmid_backend)) {
953+ LOG_DEBUG (" Using single-thread for tensor loading because RPC backend is used" );
954+ n_th = 1 ; // avoid multi-thread for loading to remote
955+ }
956+ #endif
957+ bool success = model_loader.load_tensors (tensors, ignore_tensors, n_th, sd_ctx_params->enable_mmap );
933958 if (!success) {
934959 LOG_ERROR (" load tensors from model loader failed" );
935960 ggml_free (ctx);
@@ -949,15 +974,27 @@ class StableDiffusionGGML {
949974 vae_params_mem_size = first_stage_model->get_params_buffer_size ();
950975 }
951976 if (use_tiny_autoencoder || version == VERSION_SDXS) {
952- if (use_tiny_autoencoder && !tae_first_stage->load_from_file (taesd_path, n_threads)) {
977+ int n_th = n_threads;
978+ #ifdef SD_USE_RPC
979+ if (ggml_backend_is_rpc (tae_backend)) {
980+ n_th = 1 ; // avoid multi-thread for loading to remote
981+ }
982+ #endif
983+ if (use_tiny_autoencoder && !tae_first_stage->load_from_file (taesd_path, n_th)) {
953984 return false ;
954985 }
955986 use_tiny_autoencoder = true ; // now the processing is identical for VERSION_SDXS
956987 vae_params_mem_size = tae_first_stage->get_params_buffer_size ();
957988 }
958989 size_t control_net_params_mem_size = 0 ;
959990 if (control_net) {
960- if (!control_net->load_from_file (SAFE_STR (sd_ctx_params->control_net_path ), n_threads)) {
991+ int n_th = n_threads;
992+ #ifdef SD_USE_RPC
993+ if (ggml_backend_is_rpc (control_net_backend)) {
994+ n_th = 1 ; // avoid multi-thread for loading to remote
995+ }
996+ #endif
997+ if (!control_net->load_from_file (SAFE_STR (sd_ctx_params->control_net_path ), n_th)) {
961998 return false ;
962999 }
9631000 control_net_params_mem_size = control_net->get_params_buffer_size ();
@@ -1170,7 +1207,13 @@ class StableDiffusionGGML {
11701207 LOG_DEBUG (" high noise lora: %s" , lora_path.c_str ());
11711208 }
11721209 auto lora = std::make_shared<LoraModel>(lora_id, backend, lora_path, is_high_noise ? " model.high_noise_" : " " , version);
1173- if (!lora->load_from_file (n_threads, lora_tensor_filter)) {
1210+ int n_th = n_threads;
1211+ #ifdef SD_USE_RPC
1212+ if (ggml_backend_is_rpc (backend)) {
1213+ n_th = 1 ; // avoid multi-thread for loading to remote
1214+ }
1215+ #endif
1216+ if (!lora->load_from_file (n_th, lora_tensor_filter)) {
11741217 LOG_WARN (" load lora tensors from %s failed" , lora_path.c_str ());
11751218 return nullptr ;
11761219 }
0 commit comments