From d256629bf9e194838d1837be74dcc0b8ff0bcfd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Barri=C3=A9?= Date: Mon, 16 Feb 2026 11:59:50 +0100 Subject: [PATCH 1/4] [Bug #19831] Remove duplicate library warning When building on macOS, we get a warning about duplicate libraries ld: warning: ignoring duplicate libraries: '-ldl', '-lobjc', '-lpthread' To fix it, we now append $(MAINLIBS) to LIBRUBYARG_SHARED (when shared is enabled), matching what's already done for LIBRUBYARG_STATIC. And we remove the now-redundant $(MAINLIBS) from the $(PROGRAM) link line, since $(LIBRUBYARG) carries it in both cases. --- configure.ac | 3 +++ template/Makefile.in | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 05e2e9aca26ddf..c426931b396f21 100644 --- a/configure.ac +++ b/configure.ac @@ -4342,6 +4342,9 @@ AS_IF([test -n "${LIBS}"], [ MAINFLAGS=`echo " $MAINLIBS " | sed "s|$libspat"'||;s/^ *//;s/ *$//'` ]) LIBRUBYARG_STATIC="${LIBRUBYARG_STATIC} \$(MAINLIBS)" +AS_IF([test "$enable_shared" = yes], [ + LIBRUBYARG_SHARED="${LIBRUBYARG_SHARED} \$(MAINLIBS)" +]) CPPFLAGS="$CPPFLAGS "'$(DEFS) ${cppflags}' AS_IF([test -n "${cflags+set}"], [ cflagspat=`eval echo '"'"${cflags}"'"' | sed 's/[[][|.*]]/\\&/g;s/^ */ /;s/^ *$/ /'` diff --git a/template/Makefile.in b/template/Makefile.in index 8e93efc310cd97..e8b6eef6090e81 100644 --- a/template/Makefile.in +++ b/template/Makefile.in @@ -314,7 +314,7 @@ miniruby$(EXEEXT): $(PROGRAM): @$(RM) $@ $(ECHO) linking $@ - $(Q) $(PURIFY) $(CC) $(EXE_LDFLAGS) $(XLDFLAGS) $(MAINOBJ) $(EXTOBJS) $(LIBRUBYARG) $(MAINLIBS) $(EXTLIBS) $(OUTFLAG)$@ + $(Q) $(PURIFY) $(CC) $(EXE_LDFLAGS) $(XLDFLAGS) $(MAINOBJ) $(EXTOBJS) $(LIBRUBYARG) $(EXTLIBS) $(OUTFLAG)$@ $(Q) $(POSTLINK) $(PROGRAM): @XRUBY_LIBPATHENV_WRAPPER@ From ea7d3eb89b8c5192c0cf9a885a17b0aeaef1067a Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Mon, 9 Feb 2026 16:46:50 -0500 Subject: [PATCH 2/4] YJIT: Fix always-failing guard for `super()` in BMETHODs Previously, when dealing with a `super()` nested in a block that runs as a method (through e.g. `define_method`), YJIT generated a guard that never passes leading to a misidentification of the callsite as megamorphic and an unconditional interpreter fallback. The issue was in the subroutine to find the currently running method entry. In the interpreter, this is rb_vm_frame_method_entry(). YJIT used `gen_get_lep()` to find the EP with `VM_ENV_FLAG_LOCAL`, but in case of BMETHODs, the corresponding CME is never at an EP level with `VM_ENV_FLAG_LOCAL` set. Because each block nesting level can dynamically run as either a BMETHOD or not, starting at a block and finding the first EP that has a method entry ultimately requires a search loop such as the one in rb_vm_frame_method_entry(). This patch introduces such a loop. Because `invokesuper` in a block can now work end-to-end, add check for the previously masked "implicit argument passing of super from method defined by define_method() is not supported..." condition. --- bootstraptest/test_yjit.rb | 26 +++++++++ test/ruby/test_yjit.rb | 34 +++++++++++ yjit/src/codegen.rs | 113 ++++++++++++++++++++++++++++--------- yjit/src/stats.rs | 1 + 4 files changed, 146 insertions(+), 28 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index be66395190b273..cc7d9f1aeb3dc3 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -2483,6 +2483,32 @@ def foo B.new.foo } +# invokesuper in a weird block +assert_equal '["block->A#itself", "block->singleton#itself"]', %q{ + # This test runs the same block as first as a block and then as a method, + # testing the routine that finds the currently running method, which is + # relevant for `super`. + class BlockIseqDuality + prepend(Module.new do + def itself + nested = -> { "block->" + super() } + @singleton_itself.define_singleton_method(:itself, &nested) + nested + end + end) + + attr_reader :singleton_itself + def initialize = (@singleton_itself = "singleton#itself") + + def itself = "A#itself" + end + + tester = BlockIseqDuality.new + super_lambda = tester.itself + super_lambda.call # warmup + [super_lambda.call, tester.singleton_itself.itself] +} + # invokesuper zsuper in a bmethod assert_equal 'ok', %q{ class Foo diff --git a/test/ruby/test_yjit.rb b/test/ruby/test_yjit.rb index d6b9b75648e744..2670db181c7795 100644 --- a/test/ruby/test_yjit.rb +++ b/test/ruby/test_yjit.rb @@ -973,6 +973,40 @@ def to_s RUBY end + def test_super_bmethod + # Bmethod defined at class scope + assert_compiles(<<~'RUBY', insns: %i[invokesuper], result: true, exits: {}) + class SuperItself + define_method(:itself) { super() } + end + + obj = SuperItself.new + obj.itself + obj.itself == obj + RUBY + + # Bmethod defined inside a method (the block's local_iseq is ISEQ_TYPE_METHOD + # but the CME is at the bmethod frame, not the enclosing method's frame) + assert_compiles(<<~'RUBY', insns: %i[invokesuper], result: "Base#foo via bmethod", exits: {}) + class Base + def foo = "Base#foo" + end + + class SetupHelper + def add_bmethod_to(klass) + klass.define_method(:foo) { super() + " via bmethod" } + end + end + + class Target < Base; end + + SetupHelper.new.add_bmethod_to(Target) + obj = Target.new + obj.foo + obj.foo + RUBY + end + # Tests calling a variadic cfunc with many args def test_build_large_struct assert_compiles(<<~RUBY, insns: %i[opt_send_without_block], call_threshold: 2) diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 0a4832d0949b56..2b9d3ca2560f3f 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2425,22 +2425,83 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd { ep_opnd } -// Gets the EP of the ISeq of the containing method, or "local level". -// Equivalent of GET_LEP() macro. +/// Get the EP of the ISeq of the containing method, or "local level EP". +/// Equivalent to `GET_LEP()` with a constraint: +/// `ISEQ_TYPE_METHOD == iseq_under_compilation->body->local_iseq->body->type`. +/// No bad memory access happens when this condition is not met, just that the +/// EP returned may not be `VM_ENV_FLAG_LOCAL`. Practically, ISeqs that don't +/// meet this condition, such as `ISEQ_TYPE_TOP`, also don't need to use this operation. fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd { - // Equivalent of get_lvar_level() in compile.c - fn get_lvar_level(iseq: IseqPtr) -> u32 { - if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } { - 0 - } else { - 1 + get_lvar_level(unsafe { rb_get_iseq_body_parent_iseq(iseq) }) - } - } - - let level = get_lvar_level(jit.get_iseq()); + // GET_LEP() chases the parent environment pointer to reach the local environment. Inside + // a descendant of ISEQ_TYPE_METHOD, it chases the same number of times as chasing + // the parent iseq pointer to reach the local iseq. See get_lvar_level() in compile.c + let mut iseq = jit.get_iseq(); + let local_iseq = unsafe { rb_get_iseq_body_local_iseq(iseq) }; + let mut level = 0; + while iseq != local_iseq { + iseq = unsafe { rb_get_iseq_body_parent_iseq(iseq) }; + level += 1; + } + asm_comment!(asm, "get_lep(level: {level})"); gen_get_ep(asm, level) } +/// Load the value in the ME_CREF slot of the EP that contains the running CME. +/// Returns the slot value directly, which is only useful for guarding that the +/// CME has not changed. Unlike rb_vm_frame_method_entry(), we never dereference +/// `ep[VM_ENV_DATA_INDEX_ME_CREF]` but rather rely on `ep[VM_ENV_DATA_INDEX_FLAGS]` +/// to terminate the search, since we load the flags anyways for EP hopping. +/// When `ep[VM_ENV_DATA_INDEX_ME_CREF]` is not a CME, it can't match the expected +/// CME, and the guard fails. +fn gen_get_running_cme_or_sentinal(jit: &JITState, asm: &mut Assembler) -> Opnd { + // When not in a block, the running CME is at `cfp->ep`. + if jit.iseq == unsafe { rb_get_iseq_body_local_iseq(jit.iseq) } { + asm_comment!(asm, "cfp->ep[VM_ENV_DATA_INDEX_ME_CREF]"); + let lep_opnd = gen_get_ep(asm, 0); + Opnd::mem( + VALUE_BITS, + lep_opnd, + SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF, + ) + } else { + // Loop through EPs to find the one containing the CME. + // Stop when we find an EP with VM_FRAME_FLAG_BMETHOD (bmethod frame with CME) + // or VM_ENV_FLAG_LOCAL (local frame which is METHOD/CFUNC/IFUNC with CME). + // + // We cannot unroll to a static hop count like gen_get_lep() because bmethods + // defined inside methods may have a CME that lives at a EP cloers to the + // starting EP than at the local and final EP level. Each level of nesting can + // dynamically run with and without VM_FRAME_FLAG_BMETHOD set. + asm_comment!(asm, "search for running cme"); + let loop_label = asm.new_label("cme_loop"); + let done_label = asm.new_label("cme_done"); + + let ep_opnd = Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_EP); + let ep_opnd = asm.load(ep_opnd); + + asm.write_label(loop_label); + // Load flags from ep[VM_ENV_DATA_INDEX_FLAGS] + let flags = asm.load(Opnd::mem(VALUE_BITS, ep_opnd, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32))); + // Check if VM_FRAME_FLAG_BMETHOD or VM_ENV_FLAG_LOCAL is set. + // If either is set, this EP contains the CME. + let check_flags = (VM_FRAME_FLAG_BMETHOD | VM_ENV_FLAG_LOCAL).as_usize(); + asm.test(flags, check_flags.into()); + asm.jnz(done_label); + // Get the previous EP from the current EP + // See GET_PREV_EP(ep) macro + // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03)) + let offs = SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL; + let next_ep_opnd = asm.load(Opnd::mem(VALUE_BITS, ep_opnd, offs)); + let next_ep_opnd = asm.and(next_ep_opnd, (!0x03_i64).into()); + asm.load_into(ep_opnd, next_ep_opnd); + asm.jmp(loop_label); + asm.write_label(done_label); + + // Load and return the CME from ep[VM_ENV_DATA_INDEX_ME_CREF] + asm.load(Opnd::mem(VALUE_BITS, ep_opnd, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF)) + } +} + fn gen_getlocal_generic( jit: &mut JITState, asm: &mut Assembler, @@ -9832,6 +9893,15 @@ fn gen_invokesuper_specialized( return None; } + let ci = unsafe { get_call_data_ci(cd) }; + let ci_flags = unsafe { vm_ci_flag(ci) }; + + // Bail on ZSUPER inside a block method. They always raise. + if ci_flags & VM_CALL_ZSUPER != 0 && VM_METHOD_TYPE_BMETHOD == unsafe { get_cme_def_type(me) } { + gen_counter_incr(jit, asm, Counter::invokesuper_bmethod_zsuper); + return None; + } + // FIXME: We should track and invalidate this block when this cme is invalidated let current_defined_class = unsafe { (*me).defined_class }; let mid = unsafe { get_def_original_id((*me).def) }; @@ -9847,14 +9917,8 @@ fn gen_invokesuper_specialized( let comptime_superclass = unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) }; - let ci = unsafe { get_call_data_ci(cd) }; - let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); - - let ci_flags = unsafe { vm_ci_flag(ci) }; - // Don't JIT calls that aren't simple // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block. - if ci_flags & VM_CALL_KWARG != 0 { gen_counter_incr(jit, asm, Counter::invokesuper_kwarg); return None; @@ -9873,6 +9937,7 @@ fn gen_invokesuper_specialized( // cheaper calculations first, but since we specialize on the method entry // and so only have to do this once at compile time this is fine to always // check and side exit. + let argc: i32 = unsafe { vm_ci_argc(ci) }.try_into().unwrap(); let comptime_recv = jit.peek_at_stack(&asm.ctx, argc as isize); if unsafe { rb_obj_is_kind_of(comptime_recv, current_defined_class) } == VALUE(0) { gen_counter_incr(jit, asm, Counter::invokesuper_defined_class_mismatch); @@ -9900,16 +9965,8 @@ fn gen_invokesuper_specialized( return None; } - asm_comment!(asm, "guard known me"); - let lep_opnd = gen_get_lep(jit, asm); - let ep_me_opnd = Opnd::mem( - 64, - lep_opnd, - SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_ME_CREF, - ); - - let me_as_value = VALUE(me as usize); - asm.cmp(ep_me_opnd, me_as_value.into()); + let cme_opnd = gen_get_running_cme_or_sentinal(jit, asm); + asm.cmp(cme_opnd, VALUE::from(me).into()); jit_chain_guard( JCC_JNE, jit, diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 105def2fff8577..0b0926262fdce7 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -402,6 +402,7 @@ make_counters! { invokesuper_megamorphic, invokesuper_no_cme, invokesuper_no_me, + invokesuper_bmethod_zsuper, invokesuper_not_iseq_or_cfunc, invokesuper_refinement, invokesuper_singleton_class, From 219ad6823a4d0020f49aed9f562e19b40ca4c40e Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Tue, 10 Feb 2026 17:30:06 -0500 Subject: [PATCH 3/4] ZJIT: Don't specialize `super` from within a block As we found out from YJIT, `super` from within a block needs a loop for the running CME guard, and the current LEP based guard always fails. Don't specialize for now so we use the fallback instead of side-exiting. --- zjit/src/hir.rs | 15 +++++++++++++-- zjit/src/hir/opt_tests.rs | 23 +++++++++++++++++++++++ zjit/src/stats.rs | 2 ++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 1888d00a98e5e6..91938a95fb5e60 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -655,6 +655,9 @@ pub enum SendFallbackReason { SingletonClassSeen, /// The super call is passed a block that the optimizer does not support. SuperCallWithBlock, + /// When the `super` is in a block, finding the running CME for guarding requires a loop. Not + /// supported for now. + SuperFromBlock, /// The profiled super class cannot be found. SuperClassNotFound, /// The `super` call uses a complex argument pattern that the optimizer does not support. @@ -707,6 +710,7 @@ impl Display for SendFallbackReason { ComplexArgPass => write!(f, "Complex argument passing"), UnexpectedKeywordArgs => write!(f, "Unexpected Keyword Args"), SingletonClassSeen => write!(f, "Singleton class previously created for receiver class"), + SuperFromBlock => write!(f, "super: call from within a block"), SuperCallWithBlock => write!(f, "super: call made with a block"), SuperClassNotFound => write!(f, "super: profiled class cannot be found"), SuperComplexArgsPass => write!(f, "super: complex argument passing to `super` call"), @@ -3439,6 +3443,15 @@ impl Function { continue; } + let frame_state = self.frame_state(state); + + // Don't handle super in a block since that needs a loop to find the running CME. + if frame_state.iseq != unsafe { rb_get_iseq_body_local_iseq(frame_state.iseq) } { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperFromBlock); + continue; + } + let ci = unsafe { get_call_data_ci(cd) }; let flags = unsafe { rb_vm_ci_flag(ci) }; assert!(flags & VM_CALL_FCALL != 0); @@ -3450,8 +3463,6 @@ impl Function { continue; } - let frame_state = self.frame_state(state); - // Get the profiled CME from the current method. let Some(profiles) = self.profiles.as_ref() else { self.push_insn_id(block, insn_id); diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 13155556947b8f..db485e12554770 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -11341,6 +11341,29 @@ mod hir_opt_tests { "); } + #[test] + fn test_invokesuper_from_a_block() { + _ = eval(" + define_method(:itself) { super() } + itself + "); + + assert_snapshot!(hir_string("itself"), @" + fn block in @:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:BasicObject = InvokeSuper v6, 0x1000 # SendFallbackReason: super: call from within a block + CheckInterrupts + Return v11 + "); + } + #[test] fn test_invokesuper_with_positional_args_optimizes_to_direct() { eval(" diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index b9771aad0a7b03..2fb233bb0f4493 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -255,6 +255,7 @@ make_counters! { send_fallback_send_cfunc_variadic, send_fallback_send_cfunc_array_variadic, send_fallback_super_call_with_block, + send_fallback_super_from_block, send_fallback_super_class_not_found, send_fallback_super_complex_args_pass, send_fallback_super_fallback_no_profile, @@ -633,6 +634,7 @@ pub fn send_fallback_counter(reason: crate::hir::SendFallbackReason) -> Counter CCallWithFrameTooManyArgs => send_fallback_ccall_with_frame_too_many_args, ObjToStringNotString => send_fallback_obj_to_string_not_string, SuperCallWithBlock => send_fallback_super_call_with_block, + SuperFromBlock => send_fallback_super_from_block, SuperClassNotFound => send_fallback_super_class_not_found, SuperComplexArgsPass => send_fallback_super_complex_args_pass, SuperNoProfiles => send_fallback_super_fallback_no_profile, From 6273827196cd872549e4e49411230c82d1f86547 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Mon, 16 Feb 2026 20:04:16 -0800 Subject: [PATCH 4/4] Remove key_addr argument to set{,_table}_insert_wb After a review of all callers, this doesn't appear to be needed for correct behavior. We could potentially have a future case where it is needed, such as a method that inserts a record into the set and returns the inserted record (which could be different from the argument if the argument was an unfrozen string). However, we don't currently have such a case, and it's better for performance to remove the code. --- set.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/set.c b/set.c index ea04a8495a1a56..6bfded02a414ee 100644 --- a/set.c +++ b/set.c @@ -362,11 +362,10 @@ set_compact_after_delete(VALUE set) } static int -set_table_insert_wb(set_table *tab, VALUE set, VALUE key, VALUE *key_addr) +set_table_insert_wb(set_table *tab, VALUE set, VALUE key) { if (tab->type != &identhash && rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) { key = rb_hash_key_str(key); - if (key_addr) *key_addr = key; } int ret = set_insert(tab, (st_data_t)key); if (ret == 0) RB_OBJ_WRITTEN(set, Qundef, key); @@ -374,9 +373,9 @@ set_table_insert_wb(set_table *tab, VALUE set, VALUE key, VALUE *key_addr) } static int -set_insert_wb(VALUE set, VALUE key, VALUE *key_addr) +set_insert_wb(VALUE set, VALUE key) { - return set_table_insert_wb(RSET_TABLE(set), set, key, key_addr); + return set_table_insert_wb(RSET_TABLE(set), set, key); } static VALUE @@ -413,7 +412,7 @@ set_s_create(int argc, VALUE *argv, VALUE klass) int i; for (i=0; i < argc; i++) { - set_table_insert_wb(table, set, argv[i], NULL); + set_table_insert_wb(table, set, argv[i]); } return set; @@ -464,7 +463,7 @@ static VALUE set_initialize_without_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set)) { VALUE element = i; - set_insert_wb(set, element, &element); + set_insert_wb(set, element); return element; } @@ -472,7 +471,7 @@ static VALUE set_initialize_with_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set)) { VALUE element = rb_yield(i); - set_insert_wb(set, element, &element); + set_insert_wb(set, element); return element; } @@ -513,7 +512,7 @@ set_i_initialize(int argc, VALUE *argv, VALUE set) for (i=0; iother, key)) { - set_table_insert_wb(data->into, data->set, key, NULL); + set_table_insert_wb(data->into, data->set, key); } return ST_CONTINUE; @@ -1098,7 +1097,7 @@ static int set_merge_i(st_data_t key, st_data_t data) { struct set_merge_args *args = (struct set_merge_args *)data; - set_table_insert_wb(args->into, args->set, key, NULL); + set_table_insert_wb(args->into, args->set, key); return ST_CONTINUE; } @@ -1106,7 +1105,7 @@ static VALUE set_merge_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set)) { VALUE element = key; - set_insert_wb(set, element, &element); + set_insert_wb(set, element); return element; } @@ -1124,7 +1123,7 @@ set_merge_enum_into(VALUE set, VALUE arg) long i; set_table *into = RSET_TABLE(set); for (i=0; i