Skip to content

Outline error path#939

Open
vchuravy wants to merge 1 commit intomasterfrom
vc/outline_error
Open

Outline error path#939
vchuravy wants to merge 1 commit intomasterfrom
vc/outline_error

Conversation

@vchuravy
Copy link
Copy Markdown
Member

Manually outline the error path, otherwise on Julia 1.10 we cause an unecessary GC-frame for every MPI function.

@vchuravy
Copy link
Copy Markdown
Member Author

vchuravy commented Mar 18, 2026

On master for Julia 1.10

julia> @code_llvm MPI.Iprobe(MPI.COMM_WORLD)
;  @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `Iprobe`
define i8 @julia_Iprobe_610({}* noundef nonnull align 4 dereferenceable(4) %0) #0 {
L18:
  %1 = alloca {}*, align 8
;  @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `Iprobe` @ /home/vchuravy/src/MPI/src/nonblocking.jl:395
; ┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `#Iprobe#87`
; │┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:398 within `Iprobe`
; ││┌ @ refpointer.jl:138 within `Ref`
; │││┌ @ refvalue.jl:7 within `RefValue`
      %gcframe7 = alloca [3 x {}*], align 16
      %gcframe7.sub = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe7, i64 0, i64 0
      %2 = bitcast [3 x {}*]* %gcframe7 to i8*
      call void @llvm.memset.p0i8.i64(i8* align 16 %2, i8 0, i64 24, i1 true)
      %3 = alloca i32, align 8
      %4 = bitcast i32* %3 to i8*
      %thread_ptr = call i8* asm "movq %fs:0, $0", "=r"() #11
      %tls_ppgcstack = getelementptr i8, i8* %thread_ptr, i64 -8
      %5 = bitcast i8* %tls_ppgcstack to {}****
      %tls_pgcstack = load {}***, {}**** %5, align 8
; └└└└
; ┌ @ refvalue.jl:59 within `getindex`
; │┌ @ Base.jl:37 within `getproperty`
    %6 = bitcast [3 x {}*]* %gcframe7 to i64*
    store i64 4, i64* %6, align 16
    %7 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe7, i64 0, i64 1
    %8 = bitcast {}** %7 to {}***
    %9 = load {}**, {}*** %tls_pgcstack, align 8
    store {}** %9, {}*** %8, align 8
    %10 = bitcast {}*** %tls_pgcstack to {}***
    store {}** %gcframe7.sub, {}*** %10, align 8
    %11 = load i32, i32* inttoptr (i64 140701644651504 to i32*), align 16
    %12 = load i32, i32* inttoptr (i64 140701644651408 to i32*), align 16
    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %4)
; └└
; ┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `#Iprobe#87`
; │┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:399 within `Iprobe`
; ││┌ @ /home/vchuravy/src/MPI/src/api/generated_api.jl:2124 within `MPI_Iprobe`
; │││┌ @ /home/vchuravy/src/MPI/src/comm.jl:11 within `unsafe_convert`
; ││││┌ @ Base.jl:37 within `getproperty`
       %13 = bitcast {}* %0 to i32*
       %14 = load i32, i32* %13, align 4
; ││└└└
; ││┌ @ refvalue.jl:59 within `getindex`
; │││┌ @ Base.jl:37 within `getproperty`
      %15 = load i64, i64* inttoptr (i64 140701644630800 to i64*), align 16
; ││└└
; ││┌ @ /home/vchuravy/src/MPI/src/api/generated_api.jl:2124 within `MPI_Iprobe`
; │││┌ @ refvalue.jl:53 within `unsafe_convert`
; ││││┌ @ pointer.jl:30 within `convert`
       %bitcast_coercion2 = ptrtoint i32* %3 to i64
; │││└└
     %16 = call i32 inttoptr (i64 140701651319248 to i32 (i32, i32, i32, i64, i64)*)(i32 %11, i32 %12, i32 %14, i64 %bitcast_coercion2, i64 %15)
; │││┌ @ promotion.jl:461 within `==` @ promotion.jl:521
      %.not = icmp eq i32 %16, 0
; │││└
     br i1 %.not, label %L29, label %L26

L26:                                              ; preds = %L18
     call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %4)
     %17 = call nonnull {}* @ijl_box_int32(i32 signext %16)
     %18 = getelementptr inbounds [3 x {}*], [3 x {}*]* %gcframe7, i64 0, i64 2
     store {}* %17, {}** %18, align 16
     store {}* %17, {}** %1, align 8
     %19 = call nonnull {}* @ijl_apply_generic({}* inttoptr (i64 140701645548176 to {}*), {}** nonnull %1, i32 1)
     call void @ijl_throw({}* %19)
     unreachable

L29:                                              ; preds = %L18
; ││└
; ││ @ /home/vchuravy/src/MPI/src/nonblocking.jl:400 within `Iprobe`
; ││┌ @ refvalue.jl:59 within `getindex`
; │││┌ @ Base.jl:37 within `getproperty`
      %20 = load i32, i32* %3, align 8
; ││└└
; ││┌ @ operators.jl:276 within `!=`
; │││┌ @ promotion.jl:461 within `==` @ promotion.jl:521
      %21 = icmp ne i32 %20, 0
; └└└└
  %22 = zext i1 %21 to i8
  %23 = load {}*, {}** %7, align 8
  %24 = bitcast {}*** %tls_pgcstack to {}**
  store {}* %23, {}** %24, align 8
  ret i8 %22
}

On this branch:

julia> @code_llvm MPI.Iprobe(MPI.COMM_WORLD)
;  @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `Iprobe`
define i8 @julia_Iprobe_437({}* noundef nonnull align 4 dereferenceable(4) %0) #0 {
L18:
;  @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `Iprobe` @ /home/vchuravy/src/MPI/src/nonblocking.jl:395
; ┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `#Iprobe#87`
; │┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:398 within `Iprobe`
; ││┌ @ refpointer.jl:138 within `Ref`
; │││┌ @ refvalue.jl:7 within `RefValue`
      %1 = alloca i32, align 8
      %2 = bitcast i32* %1 to i8*
; └└└└
; ┌ @ refvalue.jl:59 within `getindex`
; │┌ @ Base.jl:37 within `getproperty`
    %3 = load i32, i32* inttoptr (i64 140538370873152 to i32*), align 64
    %4 = load i32, i32* inttoptr (i64 140538370873056 to i32*), align 32
    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2)
; └└
; ┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:395 within `#Iprobe#87`
; │┌ @ /home/vchuravy/src/MPI/src/nonblocking.jl:399 within `Iprobe`
; ││┌ @ /home/vchuravy/src/MPI/src/api/generated_api.jl:2124 within `MPI_Iprobe`
; │││┌ @ /home/vchuravy/src/MPI/src/api/api.jl:143 within `macro expansion`
; ││││┌ @ /home/vchuravy/src/MPI/src/comm.jl:11 within `unsafe_convert`
; │││││┌ @ Base.jl:37 within `getproperty`
        %5 = bitcast {}* %0 to i32*
        %6 = load i32, i32* %5, align 4
; ││└└└└
; ││┌ @ refvalue.jl:59 within `getindex`
; │││┌ @ Base.jl:37 within `getproperty`
      %7 = load i64, i64* inttoptr (i64 140538370852448 to i64*), align 32
; ││└└
; ││┌ @ /home/vchuravy/src/MPI/src/api/generated_api.jl:2124 within `MPI_Iprobe`
; │││┌ @ /home/vchuravy/src/MPI/src/api/api.jl:143 within `macro expansion`
; ││││┌ @ refvalue.jl:53 within `unsafe_convert`
; │││││┌ @ pointer.jl:30 within `convert`
        %bitcast_coercion2 = ptrtoint i32* %1 to i64
; ││││└└
      %8 = call i32 inttoptr (i64 140538377550288 to i32 (i32, i32, i32, i64, i64)*)(i32 %3, i32 %4, i32 %6, i64 %bitcast_coercion2, i64 %7)
; ││││ @ /home/vchuravy/src/MPI/src/api/api.jl:144 within `macro expansion`
; ││││┌ @ operators.jl:276 within `!=`
; │││││┌ @ promotion.jl:461 within `==` @ promotion.jl:521
        %.not = icmp eq i32 %8, 0
; ││││└└
      br i1 %.not, label %L29, label %L26

L26:                                              ; preds = %L18
      call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2)
; ││││ @ /home/vchuravy/src/MPI/src/api/api.jl:145 within `macro expansion`
      call void @j_mpi_error_439(i32 signext %8) #4
      unreachable

L29:                                              ; preds = %L18
; ││└└
; ││ @ /home/vchuravy/src/MPI/src/nonblocking.jl:400 within `Iprobe`
; ││┌ @ refvalue.jl:59 within `getindex`
; │││┌ @ Base.jl:37 within `getproperty`
      %9 = load i32, i32* %1, align 8
; ││└└
; ││┌ @ operators.jl:276 within `!=`
; │││┌ @ promotion.jl:461 within `==` @ promotion.jl:521
      %10 = icmp ne i32 %9, 0
; └└└└
  %11 = zext i1 %10 to i8
  ret i8 %11
}

@vchuravy
Copy link
Copy Markdown
Member Author

I have to correct myself, we are not reducing allocations (at least not in the example I show), but the unecessary setup of a GC frame.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants