@@ -150,14 +150,6 @@ bool TfBuilderInputUCX::start()
150150 return false ;
151151 }
152152
153- // start receiving thread pool
154- for (unsigned i = 0 ; i < mThreadPoolSize ; i++) {
155- std::string lThreadName = " tfb_ucx_" ;
156- lThreadName += std::to_string (i);
157-
158- mThreadPool .emplace_back (std::move (create_thread_member (lThreadName.c_str (),
159- &TfBuilderInputUCX::DataHandlerThread, this , i)));
160- }
161153
162154 // Create the listener
163155 // Run the connection callback with pointer to us
@@ -284,6 +276,18 @@ bool TfBuilderInputUCX::map_data_region()
284276 mTimeFrameBuilder .mMemRes .mDataMemRes ->set_ucx_address (lUcxMemPtr);
285277 ucp_data_region_set = true ;
286278 DDDLOG (" TfBuilderInputUCX::map_data_region(): mapped the data region size={}" , lOrigSize);
279+
280+ // start receiving thread pool
281+ // NOTE: This must come after the region mapping. Threads are using mapped addresses
282+ for (unsigned i = 0 ; i < mThreadPoolSize ; i++) {
283+ std::string lThreadName = " tfb_ucx_" ;
284+ lThreadName += std::to_string (i);
285+
286+ mThreadPool .emplace_back (std::move (
287+ create_thread_member (lThreadName.c_str (), &TfBuilderInputUCX::DataHandlerThread, this , i))
288+ );
289+ }
290+
287291 return true ;
288292}
289293
@@ -354,9 +358,17 @@ void TfBuilderInputUCX::DataHandlerThread(const unsigned pThreadIdx)
354358 // Deserialization object (stf ID)
355359 IovDeserializer lStfReceiver (mTimeFrameBuilder );
356360
357- { // warm up FMQ region caches for this thread
358- mTimeFrameBuilder .newDataMessage (1 );
359- }
361+ // memory for meta-tag receive; increased later if needed
362+ std::uint64_t lMetaMemSize = std::uint64_t (2 ) << 20 ;
363+ FairMQMessagePtr lMetaMemMsg = nullptr ;
364+ void *lMetaMemPtr = nullptr ;
365+
366+ auto fAllocateMetaMessage = [&](std::uint64_t pSize) {
367+ lMetaMemSize = pSize;
368+ lMetaMemMsg = mTimeFrameBuilder .newDataMessage (pSize);
369+ lMetaMemPtr = mTimeFrameBuilder .mMemRes .mDataMemRes ->get_ucx_ptr (lMetaMemMsg->GetData ());
370+ };
371+ fAllocateMetaMessage (lMetaMemSize);
360372
361373 std::optional<std::string> lStfSenderIdOpt;
362374 std::vector<void *> lTxgPtrs;
@@ -392,19 +404,32 @@ void TfBuilderInputUCX::DataHandlerThread(const unsigned pThreadIdx)
392404 auto lStartLoop = clock::now ();
393405
394406 // Receive STF iov and metadata
395- const auto lStfMetaDataOtp = ucx::io::ucx_receive_string (lConn-> worker ) ;
396-
397- if (!lStfMetaDataOtp. has_value () ) {
398- EDDLOG (" DataHandlerThread {} : Failed to receive stf meta structure." , lStfSenderId);
407+ std:: uint64_t lReqSize = 0 ;
408+ auto lRecvMetaSize = ucx::io::ucx_receive_tag (lConn-> worker , lMetaMemPtr, lMetaMemSize, &lReqSize);
409+ if (lRecvMetaSize < 0 ) {
410+ EDDLOG (" UCXDataHandlerThread : Failed to receive stf meta structure. from={} " , lStfSenderId);
399411 continue ;
412+ } if ((lRecvMetaSize == 0 ) && (lReqSize > lMetaMemSize)) {
413+ // memory too small
414+ while (lMetaMemSize < lReqSize) {
415+ lMetaMemSize *= 2 ;
416+ }
417+ // allocate larger buffer and continue
418+ fAllocateMetaMessage (lMetaMemSize);
419+ lRecvMetaSize = ucx::io::ucx_receive_tag_data (lConn->worker , lMetaMemPtr, lReqSize);
420+ if (lRecvMetaSize < 0 ) {
421+ EDDLOG (" UCXDataHandlerThread: Failed to receive stf meta message. from={}" , lStfSenderId);
422+ continue ;
423+ }
424+ assert (lRecvMetaSize > 0 && std::uint64_t (lRecvMetaSize) == lReqSize);
400425 }
401426
402427 DDMON (" tfbuilder" , " recv.receive_meta_ms" , since<std::chrono::milliseconds>(lStartLoop));
403428 lMetaDecodeStart = clock::now ();
404429
405- const auto &lStfMetaData = lStfMetaDataOtp. value ();
406-
407- lMeta. ParseFromString (lStfMetaData);
430+ if (!lMeta. ParseFromArray (lMetaMemPtr, lRecvMetaSize)) {
431+ EDDLOG ( " UCXDataHandlerThread: Failed to parse stf meta message. from={} size={} " ,lStfSenderId, lRecvMetaSize);
432+ }
408433
409434 lTfId = lMeta.stf_hdr_meta ().stf_id ();
410435
@@ -459,8 +484,14 @@ void TfBuilderInputUCX::DataHandlerThread(const unsigned pThreadIdx)
459484 }
460485
461486 // notify StfSender we completed
462- std::string lOkStr = " OK" ;
463- if (!ucx::io::ucx_send_string (lConn->worker , lConn->ucp_ep , lOkStr) ) {
487+ struct StringData {
488+ std::uint64_t mSize ;
489+ char mMsg [4 ];
490+ } *lOk = reinterpret_cast <struct StringData *>(lMetaMemPtr);
491+ lOk->mSize = 2 ;
492+ std::memcpy (lOk->mMsg , " OK" , 2 );
493+
494+ if (!ucx::io::ucx_send_data (lConn->worker , lConn->ucp_ep , lOk->mMsg , &lOk->mSize ) ) {
464495 EDDLOG_GRL (10000 , " StfSender was NOT notified about transfer finish stf_sender={} tf_id={}" , lStfSenderId, lTfId);
465496 }
466497
0 commit comments