Open
Conversation
abbb997 to
d2d694e
Compare
Member
Author
|
On master for Julia 1.10 julia> @code_llvm MPI.Iprobe(MPI.COMM_WORLD); @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `Iprobe`
define i8 @julia_Iprobe_610({}* noundef nonnull align 4 dereferenceable(4) %0) #0 {
L18:
%1 = alloca {}*, align 8
; @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `Iprobe` @ /home/vchuravy/src/MPI/src/nonblocking.jl:395
; ┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `#Iprobe#87`
; │┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:398 within `Iprobe`
; ││┌ @ refpointer.jl:138 within `Ref`
; │││┌ @ refvalue.jl:7 within `RefValue`
%gcframe7 = alloca [3 x {}*], align 16
%gcframe7.sub = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe7, i64 0, i64 0
%2 = bitcast [3 x {}*]* %gcframe7 to i8*
call void @llvm.memset.p0i8.i64(i8* align 16 %2, i8 0, i64 24, i1 true)
%3 = alloca i32, align 8
%4 = bitcast i32* %3 to i8*
%thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #11
%tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
%5 = bitcast i8* %tls_ppgcstack to {}****
%tls_pgcstack = load {}***, {}**** %5, align 8
; └└└└
; ┌ @ refvalue.jl:59 within `getindex`
; │┌ @ Base.jl:37 within `getproperty`
%6 = bitcast [3 x {}*]* %gcframe7 to i64*
store i64 4, i64* %6, align 16
%7 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe7, i64 0, i64 1
%8 = bitcast {}** %7 to {}***
%9 = load {}**, {}*** %tls_pgcstack, align 8
store {}** %9, {}*** %8, align 8
%10 = bitcast {}*** %tls_pgcstack to {}***
store {}** %gcframe7.sub, {}*** %10, align 8
%11 = load i32, i32* inttoptr (i64 140701644651504 to i32*), align 16
%12 = load i32, i32* inttoptr (i64 140701644651408 to i32*), align 16
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %4)
; └└
; ┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `#Iprobe#87`
; │┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:399 within `Iprobe`
; ││┌ @ /home/vchuravy/src/MPI/src/api/generated_api.jl:2124 within `MPI_Iprobe`
; │││┌ @ /home/vchuravy/src/MPI/src/comm.jl:11 within `unsafe_convert`
; ││││┌ @ Base.jl:37 within `getproperty`
%13 = bitcast {}* %0 to i32*
%14 = load i32, i32* %13, align 4
; ││└└└
; ││┌ @ refvalue.jl:59 within `getindex`
; │││┌ @ Base.jl:37 within `getproperty`
%15 = load i64, i64* inttoptr (i64 140701644630800 to i64*), align 16
; ││└└
; ││┌ @ /home/vchuravy/src/MPI/src/api/generated_api.jl:2124 within `MPI_Iprobe`
; │││┌ @ refvalue.jl:53 within `unsafe_convert`
; ││││┌ @ pointer.jl:30 within `convert`
%bitcast_coercion2 = ptrtoint i32* %3 to i64
; │││└└
%16 = call i32 inttoptr (i64 140701651319248 to i32 (i32, i32, i32, i64, i64)*)(i32 %11, i32 %12, i32 %14, i64 %bitcast_coercion2, i64 %15)
; │││┌ @ promotion.jl:461 within `==` @ promotion.jl:521
%.not = icmp eq i32 %16, 0
; │││└
br i1 %.not, label %L29, label %L26
L26: ; preds = %L18
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %4)
%17 = call nonnull {}* @ijl_box_int32(i32 signext %16)
%18 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe7, i64 0, i64 2
store {}* %17, {}** %18, align 16
store {}* %17, {}** %1, align 8
%19 = call nonnull {}* @ijl_apply_generic({}* inttoptr (i64 140701645548176 to {}*), {}** nonnull %1, i32 1)
call void @ijl_throw({}* %19)
unreachable
L29: ; preds = %L18
; ││└
; ││ @ /home/vchuravy/src/MPI/src/nonblocking.jl:400 within `Iprobe`
; ││┌ @ refvalue.jl:59 within `getindex`
; │││┌ @ Base.jl:37 within `getproperty`
%20 = load i32, i32* %3, align 8
; ││└└
; ││┌ @ operators.jl:276 within `!=`
; │││┌ @ promotion.jl:461 within `==` @ promotion.jl:521
%21 = icmp ne i32 %20, 0
; └└└└
%22 = zext i1 %21 to i8
%23 = load {}*, {}** %7, align 8
%24 = bitcast {}*** %tls_pgcstack to {}**
store {}* %23, {}** %24, align 8
ret i8 %22
}On this branch: julia> @code_llvm MPI.Iprobe(MPI.COMM_WORLD); @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `Iprobe`
define i8 @julia_Iprobe_437({}* noundef nonnull align 4 dereferenceable(4) %0) #0 {
L18:
; @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `Iprobe` @ /home/vchuravy/src/MPI/src/nonblocking.jl:395
; ┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `#Iprobe#87`
; │┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:398 within `Iprobe`
; ││┌ @ refpointer.jl:138 within `Ref`
; │││┌ @ refvalue.jl:7 within `RefValue`
%1 = alloca i32, align 8
%2 = bitcast i32* %1 to i8*
; └└└└
; ┌ @ refvalue.jl:59 within `getindex`
; │┌ @ Base.jl:37 within `getproperty`
%3 = load i32, i32* inttoptr (i64 140538370873152 to i32*), align 64
%4 = load i32, i32* inttoptr (i64 140538370873056 to i32*), align 32
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2)
; └└
; ┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `#Iprobe#87`
; │┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:399 within `Iprobe`
; ││┌ @ /home/vchuravy/src/MPI/src/api/generated_api.jl:2124 within `MPI_Iprobe`
; │││┌ @ /home/vchuravy/src/MPI/src/api/api.jl:143 within `macro expansion`
; ││││┌ @ /home/vchuravy/src/MPI/src/comm.jl:11 within `unsafe_convert`
; │││││┌ @ Base.jl:37 within `getproperty`
%5 = bitcast {}* %0 to i32*
%6 = load i32, i32* %5, align 4
; ││└└└└
; ││┌ @ refvalue.jl:59 within `getindex`
; │││┌ @ Base.jl:37 within `getproperty`
%7 = load i64, i64* inttoptr (i64 140538370852448 to i64*), align 32
; ││└└
; ││┌ @ /home/vchuravy/src/MPI/src/api/generated_api.jl:2124 within `MPI_Iprobe`
; │││┌ @ /home/vchuravy/src/MPI/src/api/api.jl:143 within `macro expansion`
; ││││┌ @ refvalue.jl:53 within `unsafe_convert`
; │││││┌ @ pointer.jl:30 within `convert`
%bitcast_coercion2 = ptrtoint i32* %1 to i64
; ││││└└
%8 = call i32 inttoptr (i64 140538377550288 to i32 (i32, i32, i32, i64, i64)*)(i32 %3, i32 %4, i32 %6, i64 %bitcast_coercion2, i64 %7)
; ││││ @ /home/vchuravy/src/MPI/src/api/api.jl:144 within `macro expansion`
; ││││┌ @ operators.jl:276 within `!=`
; │││││┌ @ promotion.jl:461 within `==` @ promotion.jl:521
%.not = icmp eq i32 %8, 0
; ││││└└
br i1 %.not, label %L29, label %L26
L26: ; preds = %L18
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2)
; ││││ @ /home/vchuravy/src/MPI/src/api/api.jl:145 within `macro expansion`
call void @j_mpi_error_439(i32 signext %8) #4
unreachable
L29: ; preds = %L18
; ││└└
; ││ @ /home/vchuravy/src/MPI/src/nonblocking.jl:400 within `Iprobe`
; ││┌ @ refvalue.jl:59 within `getindex`
; │││┌ @ Base.jl:37 within `getproperty`
%9 = load i32, i32* %1, align 8
; ││└└
; ││┌ @ operators.jl:276 within `!=`
; │││┌ @ promotion.jl:461 within `==` @ promotion.jl:521
%10 = icmp ne i32 %9, 0
; └└└└
%11 = zext i1 %10 to i8
ret i8 %11
} |
giordano
approved these changes
Mar 18, 2026
Member
Author
|
I have to correct myself, we are not reducing allocations (at least not in the example I show), but the unecessary setup of a GC frame. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Manually outline the error path, otherwise on Julia 1.10 we cause an unecessary GC-frame for every MPI function.