From c21f3490d1f28b43564639ae8563bc2e02e828a4 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Wed, 14 Jan 2026 21:51:49 +0000 Subject: [PATCH 01/77] Implement a fast path for sweeping (gc_sweep_fast_path_p). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Feature #21846] There is a single path through our GC Sweeping code, and we always call rb_gc_obj_free_vm_weak_references and rb_gc_obj_free before adding the object back to the freelist. We do this even when the object has no external resources that require being free'd and has no weak references pointing to it. This commit introduces a conservative fast path through gc_sweep_plane that uses the object flags to identify certain cases where these calls can be skipped - for these objects we just add them straight back on the freelist. Any object for which gc_sweep_fast_path_p returns false will use the current full sweep code (referred to here as the slow path). Currently there are 2 checks that will _always_ require an object to go down the slow path: 1. Has it's object_id been observed and stored in the id2ref_table 2. Has it got generic ivars in the gen_fields table If neither of these are true, then we run some flag checks on the object and send the following cases down the fast path: - Objects that are not heap allocated - Embedded strings that aren't in the fstring table - Embedded Arrays - Embedded Hashes - Embedded Bignums - Embedded Strings - Floats, Rationals and Complex - Various IMEMO subtypes that do no allocation We've benchmarked this code using ruby-bench as well as the gcbench benchmarks inside Ruby (benchmarks/gc) and this patch results in a modest speed improvement on almost all of the headline benchmarks (2% in railsbench with YJIT enabled), and an observable 30% improvement in time spent sweeping during the GC benchmarks: ``` master: ruby 4.1.0dev (2026-01-19T12:03:33Z master 859920dfd2) +YJIT +PRISM [x86_64-linux] experiment: ruby 4.1.0dev (2026-01-16T21:36:46Z mvh-sweep-fast-pat.. c3ffe377a1) +YJIT +PRISM [x86_64-linux] -------------- ----------- ---------- --------------- ---------- ------------------ ----------------- bench master (ms) stddev (%) experiment (ms) stddev (%) experiment 1st itr master/experiment lobsters N/A N/A N/A N/A N/A N/A activerecord 132.5 0.9 132.5 1.0 1.056 1.001 chunky-png 577.2 0.4 580.1 0.4 0.994 0.995 erubi-rails 902.9 0.2 894.3 0.2 1.040 1.010 hexapdf 1763.9 3.3 1760.6 3.7 1.027 1.002 liquid-c 56.9 0.6 56.7 1.4 1.004 1.003 liquid-compile 46.3 2.1 46.1 2.1 1.005 1.004 liquid-render 77.8 0.8 75.1 0.9 1.023 1.036 mail 114.7 0.4 113.0 1.4 1.054 1.015 psych-load 1635.4 1.4 1625.9 0.5 0.988 1.006 railsbench 1685.4 2.4 1650.1 2.0 0.989 1.021 rubocop 133.5 8.1 130.3 7.8 1.002 1.024 ruby-lsp 140.3 1.9 137.5 1.8 1.007 1.020 sequel 64.6 0.7 63.9 0.7 1.003 1.011 shipit 1196.2 4.3 1181.5 4.2 1.003 1.012 -------------- ----------- ---------- --------------- ---------- ------------------ ----------------- Legend: - experiment 1st itr: ratio of master/experiment time for the first benchmarking iteration. - master/experiment: ratio of master/experiment time. Higher is better for experiment. Above 1 represents a speedup. ``` ``` Benchmark │ Wall(B) Sweep(B) Mark(B) │ Wall(E) Sweep(E) Mark(E) │ Wall Δ Sweep Δ ───────────────┼─────────────────────────────────┼─────────────────────────────────┼────────────────── null │ 0.000s 1ms 4ms │ 0.000s 1ms 4ms │ 0% 0% hash1 │ 4.330s 875ms 46ms │ 3.960s 531ms 44ms │ +8.6% +39.3% hash2 │ 6.356s 243ms 988ms │ 6.298s 176ms 1.03s │ +0.9% +27.6% rdoc │ 37.337s 2.42s 1.09s │ 36.678s 2.11s 1.20s │ +1.8% +13.1% binary_trees │ 3.366s 426ms 252ms │ 3.082s 275ms 239ms │ +8.4% +35.4% ring │ 5.252s 14ms 2.47s │ 5.327s 12ms 2.43s │ -1.4% +14.3% redblack │ 2.966s 28ms 41ms │ 2.940s 21ms 38ms │ +0.9% +25.0% ───────────────┼─────────────────────────────────┼─────────────────────────────────┼────────────────── Legend: (B) = Baseline, (E) = Experiment, Δ = improvement (positive = faster) Wall = total wallclock, Sweep = GC sweeping time, Mark = GC marking time Times are median of 3 runs ``` These results are also borne out when YJIT is disabled: ``` master: ruby 4.1.0dev (2026-01-19T12:03:33Z master 859920dfd2) +PRISM [x86_64-linux] experiment: ruby 4.1.0dev (2026-01-16T21:36:46Z mvh-sweep-fast-pat.. c3ffe377a1) +PRISM [x86_64-linux] -------------- ----------- ---------- --------------- ---------- ------------------ ----------------- bench master (ms) stddev (%) experiment (ms) stddev (%) experiment 1st itr master/experiment lobsters N/A N/A N/A N/A N/A N/A activerecord 389.6 0.3 377.5 0.3 1.032 1.032 chunky-png 1123.4 0.2 1109.2 0.2 1.013 1.013 erubi-rails 1754.3 0.1 1725.7 0.1 1.035 1.017 hexapdf 3346.5 0.9 3326.9 0.7 1.003 1.006 liquid-c 84.0 0.5 83.5 0.5 0.992 1.006 liquid-compile 74.0 1.5 73.5 1.4 1.011 1.008 liquid-render 199.9 0.4 199.6 0.4 1.000 1.002 mail 177.8 0.4 176.4 0.4 1.069 1.008 psych-load 2749.6 0.7 2777.0 0.0 0.980 0.990 railsbench 2983.0 1.0 2965.5 0.8 1.041 1.006 rubocop 228.8 1.0 227.5 1.2 1.015 1.005 ruby-lsp 221.8 0.9 216.1 0.8 1.011 1.026 sequel 89.1 0.5 89.1 1.8 1.005 1.000 shipit 2385.6 1.6 2371.8 1.0 1.002 1.006 -------------- ----------- ---------- --------------- ---------- ------------------ ----------------- Legend: - experiment 1st itr: ratio of master/experiment time for the first benchmarking iteration. - master/experiment: ratio of master/experiment time. Higher is better for experiment. Above 1 represents a speedup. ``` ``` Benchmark │ Wall(B) Sweep(B) Mark(B) │ Wall(E) Sweep(E) Mark(E) │ Wall Δ Sweep Δ ───────────────┼─────────────────────────────────┼─────────────────────────────────┼────────────────── null │ 0.000s 1ms 4ms │ 0.000s 1ms 3ms │ 0% 0% hash1 │ 4.349s 877ms 45ms │ 4.045s 532ms 44ms │ +7.0% +39.3% hash2 │ 6.575s 235ms 967ms │ 6.540s 181ms 1.04s │ +0.5% +23.0% rdoc │ 45.782s 2.23s 1.14s │ 44.925s 1.90s 1.01s │ +1.9% +15.0% binary_trees │ 6.433s 426ms 252ms │ 6.268s 278ms 240ms │ +2.6% +34.7% ring │ 6.584s 17ms 2.33s │ 6.738s 13ms 2.33s │ -2.3% +30.8% redblack │ 13.334s 31ms 42ms │ 13.296s 24ms 107ms │ +0.3% +22.6% ───────────────┼─────────────────────────────────┼─────────────────────────────────┼────────────────── Legend: (B) = Baseline, (E) = Experiment, Δ = improvement (positive = faster) Wall = total wallclock, Sweep = GC sweeping time, Mark = GC marking time Times are median of 3 runs ``` --- gc.c | 2 +- gc/default/default.c | 163 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 133 insertions(+), 32 deletions(-) diff --git a/gc.c b/gc.c index ab0539cd3358a3..b07bcefda2a3d5 100644 --- a/gc.c +++ b/gc.c @@ -596,6 +596,7 @@ rb_gc_guarded_ptr_val(volatile VALUE *ptr, VALUE val) #endif static const char *obj_type_name(VALUE obj); +static st_table *id2ref_tbl; #include "gc/default/default.c" #if USE_MODULAR_GC && !defined(HAVE_DLOPEN) @@ -1831,7 +1832,6 @@ rb_gc_pointer_to_heap_p(VALUE obj) #define OBJ_ID_INCREMENT (RUBY_IMMEDIATE_MASK + 1) #define LAST_OBJECT_ID() (object_id_counter * OBJ_ID_INCREMENT) static VALUE id2ref_value = 0; -static st_table *id2ref_tbl = NULL; #if SIZEOF_SIZE_T == SIZEOF_LONG_LONG static size_t object_id_counter = 1; diff --git a/gc/default/default.c b/gc/default/default.c index 013c0749946e2d..ff43e38ab9afd4 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -843,6 +843,93 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page * #define GET_HEAP_WB_UNPROTECTED_BITS(x) (&GET_HEAP_PAGE(x)->wb_unprotected_bits[0]) #define GET_HEAP_MARKING_BITS(x) (&GET_HEAP_PAGE(x)->marking_bits[0]) + +#ifndef BUILDING_MODULAR_GC +static inline bool +gc_sweep_fast_path_p(VALUE obj) +{ + VALUE flags = RBASIC(obj)->flags; + + if (flags & FL_FINALIZE) return false; + + switch (flags & RUBY_T_MASK) { + case T_IMEMO: + switch (imemo_type(obj)) { + case imemo_constcache: + case imemo_cref: + case imemo_ifunc: + case imemo_memo: + case imemo_svar: + case imemo_throw_data: + return true; + default: + return false; + } + + case T_DATA: + if (flags & RUBY_FL_USERPRIV0) { + uintptr_t type = (uintptr_t)RTYPEDDATA(obj)->type; + if (type & TYPED_DATA_EMBEDDED) { + RUBY_DATA_FUNC dfree = ((const rb_data_type_t *)(type & TYPED_DATA_PTR_MASK))->function.dfree; + return (uintptr_t)dfree + 1 <= 1; + } + } + return false; + + case T_OBJECT: + case T_STRING: + case T_ARRAY: + case T_HASH: + case T_BIGNUM: + case T_STRUCT: + case T_FLOAT: + case T_RATIONAL: + case T_COMPLEX: + break; + + default: + return false; + } + + shape_id_t shape_id = RBASIC_SHAPE_ID(obj); + if (id2ref_tbl && rb_shape_has_object_id(shape_id)) return false; + + switch (flags & RUBY_T_MASK) { + case T_OBJECT: + return !(flags & ROBJECT_HEAP); + + case T_STRING: + if (flags & (RSTRING_NOEMBED | RSTRING_FSTR)) return false; + return !rb_shape_has_fields(shape_id); + + case T_ARRAY: + if (!(flags & RARRAY_EMBED_FLAG)) return false; + return !rb_shape_has_fields(shape_id); + + case T_HASH: + if (flags & RHASH_ST_TABLE_FLAG) return false; + return !rb_shape_has_fields(shape_id); + + case T_BIGNUM: + if (!(flags & BIGNUM_EMBED_FLAG)) return false; + return !rb_shape_has_fields(shape_id); + + case T_STRUCT: + if (!(flags & RSTRUCT_EMBED_LEN_MASK)) return false; + if (flags & RSTRUCT_GEN_FIELDS) return !rb_shape_has_fields(shape_id); + return true; + + case T_FLOAT: + case T_RATIONAL: + case T_COMPLEX: + return !rb_shape_has_fields(shape_id); + + default: + UNREACHABLE_RETURN(false); + } +} +#endif + #define RVALUE_AGE_BITMAP_INDEX(n) (NUM_IN_PAGE(n) / (BITS_BITLENGTH / RVALUE_AGE_BIT_COUNT)) #define RVALUE_AGE_BITMAP_OFFSET(n) ((NUM_IN_PAGE(n) % (BITS_BITLENGTH / RVALUE_AGE_BIT_COUNT)) * RVALUE_AGE_BIT_COUNT) @@ -3481,15 +3568,34 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit rb_asan_unpoison_object(vp, false); if (bitset & 1) { switch (BUILTIN_TYPE(vp)) { - default: /* majority case */ - gc_report(2, objspace, "page_sweep: free %p\n", (void *)p); + case T_MOVED: + if (objspace->flags.during_compacting) { + /* The sweep cursor shouldn't have made it to any + * T_MOVED slots while the compact flag is enabled. + * The sweep cursor and compact cursor move in + * opposite directions, and when they meet references will + * get updated and "during_compacting" should get disabled */ + rb_bug("T_MOVED shouldn't be seen until compaction is finished"); + } + gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); + ctx->empty_slots++; + RVALUE_AGE_SET_BITMAP(vp, 0); + heap_page_add_freeobj(objspace, sweep_page, vp); + break; + case T_ZOMBIE: + /* already counted */ + break; + case T_NONE: + ctx->empty_slots++; /* already freed */ + break; + + default: #if RGENGC_CHECK_MODE if (!is_full_marking(objspace)) { if (RVALUE_OLD_P(objspace, vp)) rb_bug("page_sweep: %p - old while minor GC.", (void *)p); if (RVALUE_REMEMBERED(objspace, vp)) rb_bug("page_sweep: %p - remembered.", (void *)p); } #endif - if (RVALUE_WB_UNPROTECTED(objspace, vp)) CLEAR_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(vp), vp); #if RGENGC_CHECK_MODE @@ -3501,42 +3607,37 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit #undef CHECK #endif - rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); +#ifndef BUILDING_MODULAR_GC + if (gc_sweep_fast_path_p(vp)) { + if (UNLIKELY(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { + rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); + } - rb_gc_obj_free_vm_weak_references(vp); - if (rb_gc_obj_free(objspace, vp)) { - // always add free slots back to the swept pages freelist, - // so that if we're compacting, we can re-use the slots (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)p, BASE_SLOT_SIZE); RVALUE_AGE_SET_BITMAP(vp, 0); heap_page_add_freeobj(objspace, sweep_page, vp); - gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); + gc_report(3, objspace, "page_sweep: %s (fast path) added to freelist\n", rb_obj_info(vp)); ctx->freed_slots++; } - else { - ctx->final_slots++; - } - break; + else +#endif + { + gc_report(2, objspace, "page_sweep: free %p\n", (void *)p); - case T_MOVED: - if (objspace->flags.during_compacting) { - /* The sweep cursor shouldn't have made it to any - * T_MOVED slots while the compact flag is enabled. - * The sweep cursor and compact cursor move in - * opposite directions, and when they meet references will - * get updated and "during_compacting" should get disabled */ - rb_bug("T_MOVED shouldn't be seen until compaction is finished"); + rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); + + rb_gc_obj_free_vm_weak_references(vp); + if (rb_gc_obj_free(objspace, vp)) { + (void)VALGRIND_MAKE_MEM_UNDEFINED((void*)p, BASE_SLOT_SIZE); + RVALUE_AGE_SET_BITMAP(vp, 0); + heap_page_add_freeobj(objspace, sweep_page, vp); + gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); + ctx->freed_slots++; + } + else { + ctx->final_slots++; + } } - gc_report(3, objspace, "page_sweep: %s is added to freelist\n", rb_obj_info(vp)); - ctx->empty_slots++; - RVALUE_AGE_SET_BITMAP(vp, 0); - heap_page_add_freeobj(objspace, sweep_page, vp); - break; - case T_ZOMBIE: - /* already counted */ - break; - case T_NONE: - ctx->empty_slots++; /* already freed */ break; } } From 211714f1bfd8d0927c704713545e37f18cc75229 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Tue, 20 Jan 2026 18:49:03 +0000 Subject: [PATCH 02/77] Clarify the use of some FLAGS --- gc/default/default.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gc/default/default.c b/gc/default/default.c index ff43e38ab9afd4..b4c6b9819a3d4f 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -867,10 +867,13 @@ gc_sweep_fast_path_p(VALUE obj) } case T_DATA: - if (flags & RUBY_FL_USERPRIV0) { + if (flags & RUBY_TYPED_FL_IS_TYPED_DATA) { uintptr_t type = (uintptr_t)RTYPEDDATA(obj)->type; if (type & TYPED_DATA_EMBEDDED) { RUBY_DATA_FUNC dfree = ((const rb_data_type_t *)(type & TYPED_DATA_PTR_MASK))->function.dfree; + // Fast path for embedded T_DATA with no custom free function. + // True when dfree is NULL (RUBY_NEVER_FREE) or -1 (RUBY_TYPED_DEFAULT_FREE). + // Single comparison used instead of two equality checks for performance. return (uintptr_t)dfree + 1 <= 1; } } From efde37b7122b23775b906ad90cf4e88e05b756a8 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Thu, 22 Jan 2026 22:28:24 +0000 Subject: [PATCH 03/77] Move the gc fast path out of the default GC impl It relies too much on VM level concerns, such that it can't be built with modular GC enabled. We'll move it into the VM, and then expose it to the GC implementations so they can use it. --- gc.c | 100 +++++++++++++++++++++++++++++++++++++++++++ gc/default/default.c | 92 ++------------------------------------- gc/gc.h | 1 + 3 files changed, 104 insertions(+), 89 deletions(-) diff --git a/gc.c b/gc.c index b07bcefda2a3d5..f8d19fb072289c 100644 --- a/gc.c +++ b/gc.c @@ -1243,6 +1243,106 @@ rb_gc_handle_weak_references(VALUE obj) } } +/* + * Returns true if the object requires a full rb_gc_obj_free() call during sweep, + * false if it can be freed quickly without calling destructors or cleanup. + * + * Objects that return false are: + * - Simple embedded objects without external allocations + * - Objects without finalizers + * - Objects without object IDs registered in id2ref + * - Objects without generic instance variables + * + * This is used by the GC sweep fast path to avoid function call overhead + * for the majority of simple objects. + */ +bool +rb_gc_obj_free_on_sweep_p(VALUE obj) +{ + VALUE flags = RBASIC(obj)->flags; + + if (flags & FL_FINALIZE) return true; + + switch (flags & RUBY_T_MASK) { + case T_IMEMO: + switch (imemo_type(obj)) { + case imemo_constcache: + case imemo_cref: + case imemo_ifunc: + case imemo_memo: + case imemo_svar: + case imemo_throw_data: + return false; + default: + return true; + } + + case T_DATA: + if (flags & RUBY_TYPED_FL_IS_TYPED_DATA) { + uintptr_t type = (uintptr_t)RTYPEDDATA(obj)->type; + if (type & TYPED_DATA_EMBEDDED) { + RUBY_DATA_FUNC dfree = ((const rb_data_type_t *)(type & TYPED_DATA_PTR_MASK))->function.dfree; + // Fast path for embedded T_DATA with no custom free function. + // True when dfree is NULL (RUBY_NEVER_FREE) or -1 (RUBY_TYPED_DEFAULT_FREE). + if ((uintptr_t)dfree + 1 <= 1) return false; + } + } + return true; + + case T_OBJECT: + case T_STRING: + case T_ARRAY: + case T_HASH: + case T_BIGNUM: + case T_STRUCT: + case T_FLOAT: + case T_RATIONAL: + case T_COMPLEX: + break; + + default: + return true; + } + + shape_id_t shape_id = RBASIC_SHAPE_ID(obj); + if (id2ref_tbl && rb_shape_has_object_id(shape_id)) return true; + + switch (flags & RUBY_T_MASK) { + case T_OBJECT: + if (flags & ROBJECT_HEAP) return true; + return false; + + case T_STRING: + if (flags & (RSTRING_NOEMBED | RSTRING_FSTR)) return true; + return rb_shape_has_fields(shape_id); + + case T_ARRAY: + if (!(flags & RARRAY_EMBED_FLAG)) return true; + return rb_shape_has_fields(shape_id); + + case T_HASH: + if (flags & RHASH_ST_TABLE_FLAG) return true; + return rb_shape_has_fields(shape_id); + + case T_BIGNUM: + if (!(flags & BIGNUM_EMBED_FLAG)) return true; + return rb_shape_has_fields(shape_id); + + case T_STRUCT: + if (!(flags & RSTRUCT_EMBED_LEN_MASK)) return true; + if (flags & RSTRUCT_GEN_FIELDS) return rb_shape_has_fields(shape_id); + return false; + + case T_FLOAT: + case T_RATIONAL: + case T_COMPLEX: + return rb_shape_has_fields(shape_id); + + default: + UNREACHABLE_RETURN(true); + } +} + static void io_fptr_finalize(void *fptr) { diff --git a/gc/default/default.c b/gc/default/default.c index b4c6b9819a3d4f..0f2ecdfaeb4612 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -844,94 +844,11 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page * #define GET_HEAP_MARKING_BITS(x) (&GET_HEAP_PAGE(x)->marking_bits[0]) -#ifndef BUILDING_MODULAR_GC static inline bool gc_sweep_fast_path_p(VALUE obj) { - VALUE flags = RBASIC(obj)->flags; - - if (flags & FL_FINALIZE) return false; - - switch (flags & RUBY_T_MASK) { - case T_IMEMO: - switch (imemo_type(obj)) { - case imemo_constcache: - case imemo_cref: - case imemo_ifunc: - case imemo_memo: - case imemo_svar: - case imemo_throw_data: - return true; - default: - return false; - } - - case T_DATA: - if (flags & RUBY_TYPED_FL_IS_TYPED_DATA) { - uintptr_t type = (uintptr_t)RTYPEDDATA(obj)->type; - if (type & TYPED_DATA_EMBEDDED) { - RUBY_DATA_FUNC dfree = ((const rb_data_type_t *)(type & TYPED_DATA_PTR_MASK))->function.dfree; - // Fast path for embedded T_DATA with no custom free function. - // True when dfree is NULL (RUBY_NEVER_FREE) or -1 (RUBY_TYPED_DEFAULT_FREE). - // Single comparison used instead of two equality checks for performance. - return (uintptr_t)dfree + 1 <= 1; - } - } - return false; - - case T_OBJECT: - case T_STRING: - case T_ARRAY: - case T_HASH: - case T_BIGNUM: - case T_STRUCT: - case T_FLOAT: - case T_RATIONAL: - case T_COMPLEX: - break; - - default: - return false; - } - - shape_id_t shape_id = RBASIC_SHAPE_ID(obj); - if (id2ref_tbl && rb_shape_has_object_id(shape_id)) return false; - - switch (flags & RUBY_T_MASK) { - case T_OBJECT: - return !(flags & ROBJECT_HEAP); - - case T_STRING: - if (flags & (RSTRING_NOEMBED | RSTRING_FSTR)) return false; - return !rb_shape_has_fields(shape_id); - - case T_ARRAY: - if (!(flags & RARRAY_EMBED_FLAG)) return false; - return !rb_shape_has_fields(shape_id); - - case T_HASH: - if (flags & RHASH_ST_TABLE_FLAG) return false; - return !rb_shape_has_fields(shape_id); - - case T_BIGNUM: - if (!(flags & BIGNUM_EMBED_FLAG)) return false; - return !rb_shape_has_fields(shape_id); - - case T_STRUCT: - if (!(flags & RSTRUCT_EMBED_LEN_MASK)) return false; - if (flags & RSTRUCT_GEN_FIELDS) return !rb_shape_has_fields(shape_id); - return true; - - case T_FLOAT: - case T_RATIONAL: - case T_COMPLEX: - return !rb_shape_has_fields(shape_id); - - default: - UNREACHABLE_RETURN(false); - } + return !rb_gc_obj_free_on_sweep_p(obj); } -#endif #define RVALUE_AGE_BITMAP_INDEX(n) (NUM_IN_PAGE(n) / (BITS_BITLENGTH / RVALUE_AGE_BIT_COUNT)) #define RVALUE_AGE_BITMAP_OFFSET(n) ((NUM_IN_PAGE(n) % (BITS_BITLENGTH / RVALUE_AGE_BIT_COUNT)) * RVALUE_AGE_BIT_COUNT) @@ -3610,9 +3527,8 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit #undef CHECK #endif -#ifndef BUILDING_MODULAR_GC if (gc_sweep_fast_path_p(vp)) { - if (UNLIKELY(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { + if (RB_UNLIKELY(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); } @@ -3622,9 +3538,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit gc_report(3, objspace, "page_sweep: %s (fast path) added to freelist\n", rb_obj_info(vp)); ctx->freed_slots++; } - else -#endif - { + else { gc_report(2, objspace, "page_sweep: free %p\n", (void *)p); rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); diff --git a/gc/gc.h b/gc/gc.h index 097ddb93949a0b..196e25cf05ffaa 100644 --- a/gc/gc.h +++ b/gc/gc.h @@ -100,6 +100,7 @@ MODULAR_GC_FN void rb_gc_after_updating_jit_code(void); MODULAR_GC_FN bool rb_gc_obj_shareable_p(VALUE); MODULAR_GC_FN void rb_gc_rp(VALUE); MODULAR_GC_FN void rb_gc_handle_weak_references(VALUE obj); +MODULAR_GC_FN bool rb_gc_obj_free_on_sweep_p(VALUE obj); #if USE_MODULAR_GC MODULAR_GC_FN bool rb_gc_event_hook_required_p(rb_event_flag_t event); From 8e73aa7ffe6f5504be9f1a90e5d9dcbe3ae8376a Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Fri, 23 Jan 2026 16:11:17 +0000 Subject: [PATCH 04/77] We don't need this wrapper function anymore --- gc/default/default.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 0f2ecdfaeb4612..c057809e4eaf3f 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -844,12 +844,6 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page * #define GET_HEAP_MARKING_BITS(x) (&GET_HEAP_PAGE(x)->marking_bits[0]) -static inline bool -gc_sweep_fast_path_p(VALUE obj) -{ - return !rb_gc_obj_free_on_sweep_p(obj); -} - #define RVALUE_AGE_BITMAP_INDEX(n) (NUM_IN_PAGE(n) / (BITS_BITLENGTH / RVALUE_AGE_BIT_COUNT)) #define RVALUE_AGE_BITMAP_OFFSET(n) ((NUM_IN_PAGE(n) % (BITS_BITLENGTH / RVALUE_AGE_BIT_COUNT)) * RVALUE_AGE_BIT_COUNT) @@ -3527,7 +3521,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit #undef CHECK #endif - if (gc_sweep_fast_path_p(vp)) { + if (!rb_gc_obj_free_on_sweep_p(vp)) { if (RB_UNLIKELY(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); } From 7444f415db75c1436e11b61a4ce2f461158d234c Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Fri, 23 Jan 2026 16:45:57 +0000 Subject: [PATCH 05/77] rename rb_gc_obj_free_on_sweep -> rb_gc_obj_needs_cleanup_p --- gc.c | 2 +- gc/default/default.c | 2 +- gc/gc.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gc.c b/gc.c index f8d19fb072289c..d519a214178626 100644 --- a/gc.c +++ b/gc.c @@ -1257,7 +1257,7 @@ rb_gc_handle_weak_references(VALUE obj) * for the majority of simple objects. */ bool -rb_gc_obj_free_on_sweep_p(VALUE obj) +rb_gc_obj_needs_cleanup_p(VALUE obj) { VALUE flags = RBASIC(obj)->flags; diff --git a/gc/default/default.c b/gc/default/default.c index c057809e4eaf3f..5758fe188555d2 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -3521,7 +3521,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit #undef CHECK #endif - if (!rb_gc_obj_free_on_sweep_p(vp)) { + if (!rb_gc_obj_needs_cleanup_p(vp)) { if (RB_UNLIKELY(objspace->hook_events & RUBY_INTERNAL_EVENT_FREEOBJ)) { rb_gc_event_hook(vp, RUBY_INTERNAL_EVENT_FREEOBJ); } diff --git a/gc/gc.h b/gc/gc.h index 196e25cf05ffaa..5979b4a00193e2 100644 --- a/gc/gc.h +++ b/gc/gc.h @@ -100,7 +100,7 @@ MODULAR_GC_FN void rb_gc_after_updating_jit_code(void); MODULAR_GC_FN bool rb_gc_obj_shareable_p(VALUE); MODULAR_GC_FN void rb_gc_rp(VALUE); MODULAR_GC_FN void rb_gc_handle_weak_references(VALUE obj); -MODULAR_GC_FN bool rb_gc_obj_free_on_sweep_p(VALUE obj); +MODULAR_GC_FN bool rb_gc_obj_needs_cleanup_p(VALUE obj); #if USE_MODULAR_GC MODULAR_GC_FN bool rb_gc_event_hook_required_p(rb_event_flag_t event); From d15117e2937b78b0868e0f41336f6350bbf1a1c4 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Mon, 26 Jan 2026 10:48:48 +0000 Subject: [PATCH 06/77] BIGNUM can't have fields other than object_id --- gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gc.c b/gc.c index d519a214178626..d1b504ed001e17 100644 --- a/gc.c +++ b/gc.c @@ -1326,7 +1326,7 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) case T_BIGNUM: if (!(flags & BIGNUM_EMBED_FLAG)) return true; - return rb_shape_has_fields(shape_id); + return false; case T_STRUCT: if (!(flags & RSTRUCT_EMBED_LEN_MASK)) return true; From 3c634893e245c578181e8337b4025d1f673d77e8 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Mon, 26 Jan 2026 12:46:00 +0000 Subject: [PATCH 07/77] Remove the unnecesary integer comparison Most compilers will optimise this anyway --- gc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gc.c b/gc.c index d1b504ed001e17..f1c7f834d0f70c 100644 --- a/gc.c +++ b/gc.c @@ -1282,9 +1282,7 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) uintptr_t type = (uintptr_t)RTYPEDDATA(obj)->type; if (type & TYPED_DATA_EMBEDDED) { RUBY_DATA_FUNC dfree = ((const rb_data_type_t *)(type & TYPED_DATA_PTR_MASK))->function.dfree; - // Fast path for embedded T_DATA with no custom free function. - // True when dfree is NULL (RUBY_NEVER_FREE) or -1 (RUBY_TYPED_DEFAULT_FREE). - if ((uintptr_t)dfree + 1 <= 1) return false; + return (dfree == RUBY_NEVER_FREE || dfree == RUBY_TYPED_DEFAULT_FREE); } } return true; From 994257ab06072df38de024e70a60aa9a87e36089 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 26 Jan 2026 14:34:37 -0500 Subject: [PATCH 08/77] Prevent starvation when acquiring mutex over and over (#15877) Continually locking a mutex m can lead to starvation if all other threads are on the waitq of m. See https://bugs.ruby-lang.org/issues/21840 for more details. Solution: When a thread `T1` wakes up `T2` during mutex unlock but `T1` or any other thread successfully acquires it before `T2`, then we record the `running_time` of the thread during mutex acquisition. Then during unlock, if that thread's running_time is less than the saved running time, we set it back to the saved time. Fixes [Bug #21840] --- test/ruby/test_thread.rb | 33 +++++++++++++++++++++++++++++++++ thread_sync.c | 24 ++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/test/ruby/test_thread.rb b/test/ruby/test_thread.rb index b2d8e73693807c..60e3aa772a8642 100644 --- a/test/ruby/test_thread.rb +++ b/test/ruby/test_thread.rb @@ -1664,4 +1664,37 @@ def test_mn_threads_sub_millisecond_sleep assert_operator elapsed, :>=, 0.1, "sub-millisecond sleeps should not return immediately" end; end + + # [Bug #21840] + def test_mutex_owner_doesnt_starve_waiters + assert_ruby_status([], "#{<<~"begin;"}\n#{<<~'end;'}") + begin; + m = Mutex.new + + fib = lambda { |n| + return n if n <= 1 + fib(n - 1) + fib(n - 2) + } + + t1_running = false + t1 = Thread.new do + t1_running = true + loop do + fib(20) + m.synchronize do + File.open(__FILE__) { } # reset timeslice due to blocking operation + end + end + end + + loop until t1_running + + 3.times.map do + Thread.new do + m.synchronize do + end + end + end.each(&:join) + end; + end end diff --git a/thread_sync.c b/thread_sync.c index e3916c97cbd0a6..2963b6db73b123 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -10,6 +10,8 @@ typedef struct rb_mutex_struct { rb_thread_t *th; // even if the fiber is collected, we might need access to the thread in mutex_free struct rb_mutex_struct *next_mutex; struct ccan_list_head waitq; /* protected by GVL */ + uint32_t saved_running_time_us; + bool wait_waking; // Is there a thread waiting to be woken up by this mutex? Reset during every wakeup. } rb_mutex_t; /* sync_waiter is always on-stack */ @@ -212,8 +214,15 @@ mutex_locked(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) static inline bool do_mutex_trylock(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) { + // NOTE: we can successfully lock a mutex even if there are other threads waiting on it. First one to it wins. if (mutex->ec_serial == 0) { RUBY_DEBUG_LOG("%p ok", mutex); + if (mutex->wait_waking) { + // If we acquired `mutex` without contention and before the thread that was popped off the waitq, we're going + // to set our running_time back to what it was here during mutex unlock if it got reset during our critical + // section. This is to prevent starvation of other threads waiting on the mutex. + mutex->saved_running_time_us = th->running_time_us; + } mutex_locked(mutex, th, ec_serial); return true; @@ -350,7 +359,8 @@ do_mutex_lock(struct mutex_args *args, int interruptible_p) } ccan_list_del(&sync_waiter.node); - // unlocked by another thread while sleeping + // If mutex->ec_serial != 0, the mutex was locked by another thread before we had the chance to acquire it. + // We'll put ourselves on the waitq and sleep again. if (!mutex->ec_serial) { mutex_set_owner(mutex, th, ec_serial); } @@ -391,6 +401,7 @@ do_mutex_lock(struct mutex_args *args, int interruptible_p) if (saved_ints) th->ec->interrupt_flag = saved_ints; if (mutex->ec_serial == ec_serial) mutex_locked(mutex, th, ec_serial); + mutex->wait_waking = false; } RUBY_DEBUG_LOG("%p locked", mutex); @@ -454,6 +465,15 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) struct sync_waiter *cur = 0, *next; + + if (mutex->wait_waking) { + uint32_t saved = mutex->saved_running_time_us; + if (th->running_time_us < saved) { + th->running_time_us = saved; + } + } + + mutex->saved_running_time_us = 0; mutex->ec_serial = 0; thread_mutex_remove(th, mutex); @@ -469,6 +489,7 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) case THREAD_RUNNABLE: /* from someone else calling Thread#run */ case THREAD_STOPPED_FOREVER: /* likely (rb_mutex_lock) */ RUBY_DEBUG_LOG("wakeup th:%u", rb_th_serial(cur->th)); + mutex->wait_waking = true; rb_threadptr_interrupt(cur->th); return NULL; case THREAD_STOPPED: /* probably impossible */ @@ -480,7 +501,6 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) } } } - // We did not find any threads to wake up, so we can just return with no error: return NULL; } From 2605d4e5fce35a0018957634f98acffc8c5370e6 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Thu, 22 Jan 2026 09:58:25 -0800 Subject: [PATCH 09/77] ZJIT: Extract VRegId from a usize We would like to do type matching on the VRegId. Extracting the VRegID from a usize makes the code a bit easier to understand and refactor. MemBase uses a VReg, and there is also a VReg in Opnd. We should be sharing types between these two, so this is a step in the direction of sharing a type --- zjit/src/backend/arm64/mod.rs | 6 +- zjit/src/backend/lir.rs | 113 +++++++++++++++++++++++---------- zjit/src/backend/tests.rs | 1 - zjit/src/backend/x86_64/mod.rs | 6 +- 4 files changed, 86 insertions(+), 40 deletions(-) diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index d06e84536f6345..6ed855ddf9c688 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -390,7 +390,7 @@ impl Assembler { } let mut asm_local = Assembler::new_with_asm(&self); - let live_ranges: Vec = take(&mut self.live_ranges); + let live_ranges = take(&mut self.live_ranges); let mut iterator = self.instruction_iterator(); let asm = &mut asm_local; @@ -755,7 +755,7 @@ impl Assembler { asm_local.accept_scratch_reg = true; asm_local.stack_base_idx = self.stack_base_idx; asm_local.label_names = self.label_names.clone(); - asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + asm_local.live_ranges = LiveRanges::new(self.live_ranges.len()); // Create one giant block to linearize everything into asm_local.new_block_without_id(); @@ -1691,7 +1691,7 @@ impl Assembler { /// /// If a, b, and c are all registers. fn merge_three_reg_mov( - live_ranges: &[LiveRange], + live_ranges: &LiveRanges, iterator: &mut InsnIter, asm: &mut Assembler, left: &Opnd, diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index f2f7bc61659d04..f0fcece8a1a7d3 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -21,18 +21,33 @@ use crate::state::rb_zjit_record_exit_stack; #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)] pub struct BlockId(pub usize); +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)] +pub struct VRegId(pub usize); + impl From for usize { fn from(val: BlockId) -> Self { val.0 } } +impl From for usize { + fn from(val: VRegId) -> Self { + val.0 + } +} + impl std::fmt::Display for BlockId { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "l{}", self.0) } } +impl std::fmt::Display for VRegId { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "v{}", self.0) + } +} + /// Dummy HIR block ID used when creating test or invalid LIR blocks const DUMMY_HIR_BLOCK_ID: usize = usize::MAX; /// Dummy RPO index used when creating test or invalid LIR blocks @@ -131,7 +146,7 @@ pub enum MemBase /// Register: Every Opnd::Mem should have MemBase::Reg as of emit. Reg(u8), /// Virtual register: Lowered to MemBase::Reg or MemBase::Stack in alloc_regs. - VReg(usize), + VReg(VRegId), /// Stack slot: Lowered to MemBase::Reg in scratch_split. Stack { stack_idx: usize, num_bits: u8 }, } @@ -158,7 +173,7 @@ impl fmt::Display for Mem { write!(f, "[")?; match self.base { MemBase::Reg(reg_no) => write!(f, "{}", mem_base_reg(reg_no))?, - MemBase::VReg(idx) => write!(f, "v{idx}")?, + MemBase::VReg(idx) => write!(f, "{idx}")?, MemBase::Stack { stack_idx, num_bits } if num_bits == 64 => write!(f, "Stack[{stack_idx}]")?, MemBase::Stack { stack_idx, num_bits } => write!(f, "Stack{num_bits}[{stack_idx}]")?, } @@ -196,7 +211,7 @@ pub enum Opnd Value(VALUE), /// Virtual register. Lowered to Reg or Mem in Assembler::alloc_regs(). - VReg{ idx: usize, num_bits: u8 }, + VReg{ idx: VRegId, num_bits: u8 }, // Low-level operands, for lowering Imm(i64), // Raw signed immediate @@ -212,8 +227,8 @@ impl fmt::Display for Opnd { None => write!(f, "None"), Value(VALUE(value)) if *value < 10 => write!(f, "Value({value:x})"), Value(VALUE(value)) => write!(f, "Value(0x{value:x})"), - VReg { idx, num_bits } if *num_bits == 64 => write!(f, "v{idx}"), - VReg { idx, num_bits } => write!(f, "VReg{num_bits}(v{idx})"), + VReg { idx, num_bits } if *num_bits == 64 => write!(f, "{idx}"), + VReg { idx, num_bits } => write!(f, "VReg{num_bits}({idx})"), Imm(value) if value.abs() < 10 => write!(f, "Imm({value:x})"), Imm(value) => write!(f, "Imm(0x{value:x})"), UImm(value) if *value < 10 => write!(f, "{value:x}"), @@ -282,7 +297,7 @@ impl Opnd } /// Unwrap the index of a VReg - pub fn vreg_idx(&self) -> usize { + pub fn vreg_idx(&self) -> VRegId { match self { Opnd::VReg { idx, .. } => *idx, _ => unreachable!("trying to unwrap {self:?} into VReg"), @@ -321,10 +336,10 @@ impl Opnd pub fn map_index(self, indices: &[usize]) -> Opnd { match self { Opnd::VReg { idx, num_bits } => { - Opnd::VReg { idx: indices[idx], num_bits } + Opnd::VReg { idx: VRegId(indices[idx.0]), num_bits } } Opnd::Mem(Mem { base: MemBase::VReg(idx), disp, num_bits }) => { - Opnd::Mem(Mem { base: MemBase::VReg(indices[idx]), disp, num_bits }) + Opnd::Mem(Mem { base: MemBase::VReg(VRegId(indices[idx.0])), disp, num_bits }) }, _ => self } @@ -1355,12 +1370,44 @@ impl LiveRange { } } +/// Type-safe wrapper around `Vec` that can be indexed by VRegId +#[derive(Clone, Debug, Default)] +pub struct LiveRanges(Vec); + +impl LiveRanges { + pub fn new(size: usize) -> Self { + Self(vec![LiveRange { start: None, end: None }; size]) + } + + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn get(&self, vreg_id: VRegId) -> Option<&LiveRange> { + self.0.get(vreg_id.0) + } +} + +impl std::ops::Index for LiveRanges { + type Output = LiveRange; + + fn index(&self, idx: VRegId) -> &Self::Output { + &self.0[idx.0] + } +} + +impl std::ops::IndexMut for LiveRanges { + fn index_mut(&mut self, idx: VRegId) -> &mut Self::Output { + &mut self.0[idx.0] + } +} + /// StackState manages which stack slots are used by which VReg pub struct StackState { /// The maximum number of spilled VRegs at a time stack_size: usize, /// Map from index at the C stack for spilled VRegs to Some(vreg_idx) if allocated - stack_slots: Vec>, + stack_slots: Vec>, /// Copy of Assembler::stack_base_idx. Used for calculating stack slot offsets. stack_base_idx: usize, } @@ -1376,7 +1423,7 @@ impl StackState { } /// Allocate a stack slot for a given vreg_idx - fn alloc_stack(&mut self, vreg_idx: usize) -> Opnd { + fn alloc_stack(&mut self, vreg_idx: VRegId) -> Opnd { for stack_idx in 0..self.stack_size { if self.stack_slots[stack_idx].is_none() { self.stack_slots[stack_idx] = Some(vreg_idx); @@ -1437,7 +1484,7 @@ struct RegisterPool { /// Some(vreg_idx) if the register at the index in `pool` is used by the VReg. /// None if the register is not in use. - pool: Vec>, + pool: Vec>, /// The number of live registers. /// Provides a quick way to query `pool.filter(|r| r.is_some()).count()` @@ -1461,7 +1508,7 @@ impl RegisterPool { /// Mutate the pool to indicate that the register at the index /// has been allocated and is live. - fn alloc_opnd(&mut self, vreg_idx: usize) -> Opnd { + fn alloc_opnd(&mut self, vreg_idx: VRegId) -> Opnd { for (reg_idx, reg) in self.regs.iter().enumerate() { if self.pool[reg_idx].is_none() { self.pool[reg_idx] = Some(vreg_idx); @@ -1473,7 +1520,7 @@ impl RegisterPool { } /// Allocate a specific register - fn take_reg(&mut self, reg: &Reg, vreg_idx: usize) -> Opnd { + fn take_reg(&mut self, reg: &Reg, vreg_idx: VRegId) -> Opnd { let reg_idx = self.regs.iter().position(|elem| elem.reg_no == reg.reg_no) .unwrap_or_else(|| panic!("Unable to find register: {}", reg.reg_no)); assert_eq!(self.pool[reg_idx], None, "register already allocated for VReg({:?})", self.pool[reg_idx]); @@ -1499,7 +1546,7 @@ impl RegisterPool { } /// Return a list of (Reg, vreg_idx) tuples for all live registers - fn live_regs(&self) -> Vec<(Reg, usize)> { + fn live_regs(&self) -> Vec<(Reg, VRegId)> { let mut live_regs = Vec::with_capacity(self.live_regs); for (reg_idx, ®) in self.regs.iter().enumerate() { if let Some(vreg_idx) = self.pool[reg_idx] { @@ -1510,7 +1557,7 @@ impl RegisterPool { } /// Return vreg_idx if a given register is already in use - fn vreg_for(&self, reg: &Reg) -> Option { + fn vreg_for(&self, reg: &Reg) -> Option { let reg_idx = self.regs.iter().position(|elem| elem.reg_no == reg.reg_no).unwrap(); self.pool[reg_idx] } @@ -1536,7 +1583,7 @@ pub struct Assembler { current_block_id: BlockId, /// Live range for each VReg indexed by its `idx`` - pub(super) live_ranges: Vec, + pub(super) live_ranges: LiveRanges, /// Names of labels pub(super) label_names: Vec, @@ -1568,7 +1615,7 @@ impl Assembler leaf_ccall_stack_size: None, basic_blocks: Vec::default(), current_block_id: BlockId(0), - live_ranges: Vec::default(), + live_ranges: LiveRanges::default(), idx: 0, } } @@ -1602,7 +1649,7 @@ impl Assembler // Initialize live_ranges to match the old assembler's size // This allows reusing VRegs from the old assembler - asm.live_ranges.resize(old_asm.live_ranges.len(), LiveRange { start: None, end: None }); + asm.live_ranges = LiveRanges::new(old_asm.live_ranges.len()); asm } @@ -1780,8 +1827,8 @@ impl Assembler /// Build an Opnd::VReg and initialize its LiveRange pub(super) fn new_vreg(&mut self, num_bits: u8) -> Opnd { - let vreg = Opnd::VReg { idx: self.live_ranges.len(), num_bits }; - self.live_ranges.push(LiveRange { start: None, end: None }); + let vreg = Opnd::VReg { idx: VRegId(self.live_ranges.len()), num_bits }; + self.live_ranges.0.push(LiveRange { start: None, end: None }); vreg } @@ -1794,7 +1841,7 @@ impl Assembler // Initialize the live range of the output VReg to insn_idx..=insn_idx if let Some(Opnd::VReg { idx, .. }) = insn.out_opnd() { - assert!(*idx < self.live_ranges.len()); + assert!(idx.0 < self.live_ranges.len()); assert_eq!(self.live_ranges[*idx], LiveRange { start: None, end: None }); self.live_ranges[*idx] = LiveRange { start: Some(insn_idx), end: Some(insn_idx) }; } @@ -1805,7 +1852,7 @@ impl Assembler match *opnd { Opnd::VReg { idx, .. } | Opnd::Mem(Mem { base: MemBase::VReg(idx), .. }) => { - assert!(idx < self.live_ranges.len()); + assert!(idx.0 < self.live_ranges.len()); assert_ne!(self.live_ranges[idx].end, None); self.live_ranges[idx].end = Some(self.live_ranges[idx].end().max(insn_idx)); } @@ -1894,7 +1941,7 @@ impl Assembler let mut vreg_opnd: Vec> = vec![None; self.live_ranges.len()]; // List of registers saved before a C call, paired with the VReg index. - let mut saved_regs: Vec<(Reg, usize)> = vec![]; + let mut saved_regs: Vec<(Reg, VRegId)> = vec![]; // Remember the indexes of Insn::FrameSetup to update the stack size later let mut frame_setup_idxs: Vec<(BlockId, usize)> = vec![]; @@ -1906,7 +1953,7 @@ impl Assembler let asm = &mut asm_local; - let live_ranges: Vec = take(&mut self.live_ranges); + let live_ranges = take(&mut self.live_ranges); while let Some((index, mut insn)) = iterator.next(asm) { // Remember the index of FrameSetup to bump slot_count when we know the max number of spilled VRegs. @@ -1924,7 +1971,7 @@ impl Assembler let new_opnd = pool.alloc_opnd(vreg_idx); asm.mov(new_opnd, C_RET_OPND); pool.dealloc_opnd(&Opnd::Reg(C_RET_REG)); - vreg_opnd[vreg_idx] = Some(new_opnd); + vreg_opnd[vreg_idx.0] = Some(new_opnd); } true @@ -1943,7 +1990,7 @@ impl Assembler // uses this operand. If it is, we can return the allocated // register to the pool. if live_ranges[idx].end() == index { - if let Some(opnd) = vreg_opnd[idx] { + if let Some(opnd) = vreg_opnd[idx.0] { pool.dealloc_opnd(&opnd); } else { unreachable!("no register allocated for insn {:?}", insn); @@ -1987,7 +2034,7 @@ impl Assembler }; if let Some(vreg_idx) = vreg_idx { if live_ranges[vreg_idx].end() == index { - debug!("Allocating a register for VReg({}) at instruction index {} even though it does not live past this index", vreg_idx, index); + debug!("Allocating a register for {vreg_idx} at instruction index {index} even though it does not live past this index"); } // This is going to be the output operand that we will set on the // instruction. CCall and LiveReg need to use a specific register. @@ -2012,7 +2059,7 @@ impl Assembler if let Some(Opnd::VReg{ idx, .. }) = opnd_iter.next() { if live_ranges[*idx].end() == index { - if let Some(Opnd::Reg(reg)) = vreg_opnd[*idx] { + if let Some(Opnd::Reg(reg)) = vreg_opnd[idx.0] { out_reg = Some(pool.take_reg(®, vreg_idx)); } } @@ -2031,7 +2078,7 @@ impl Assembler // extends beyond the index of the instruction. let out = insn.out_opnd_mut().unwrap(); let out_opnd = out_opnd.with_num_bits(out_num_bits); - vreg_opnd[out.vreg_idx()] = Some(out_opnd); + vreg_opnd[out.vreg_idx().0] = Some(out_opnd); *out = out_opnd; } @@ -2040,10 +2087,10 @@ impl Assembler while let Some(opnd) = opnd_iter.next() { match *opnd { Opnd::VReg { idx, num_bits } => { - *opnd = vreg_opnd[idx].unwrap().with_num_bits(num_bits); + *opnd = vreg_opnd[idx.0].unwrap().with_num_bits(num_bits); }, Opnd::Mem(Mem { base: MemBase::VReg(idx), disp, num_bits }) => { - *opnd = match vreg_opnd[idx].unwrap() { + *opnd = match vreg_opnd[idx.0].unwrap() { Opnd::Reg(reg) => Opnd::Mem(Mem { base: MemBase::Reg(reg.reg_no), disp, num_bits }), // If the base is spilled, lower it to MemBase::Stack, which scratch_split will lower to MemBase::Reg. Opnd::Mem(mem) => Opnd::Mem(Mem { base: pool.stack_state.mem_to_stack_membase(mem), disp, num_bits }), @@ -2058,7 +2105,7 @@ impl Assembler // register if let Some(idx) = vreg_idx { if live_ranges[idx].end() == index { - if let Some(opnd) = vreg_opnd[idx] { + if let Some(opnd) = vreg_opnd[idx.0] { pool.dealloc_opnd(&opnd); } else { unreachable!("no register allocated for insn {:?}", insn); @@ -2849,7 +2896,7 @@ impl Assembler { asm_local.accept_scratch_reg = self.accept_scratch_reg; asm_local.stack_base_idx = self.stack_base_idx; asm_local.label_names = self.label_names.clone(); - asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + asm_local.live_ranges = LiveRanges::new(self.live_ranges.len()); // Create one giant block to linearize everything into asm_local.new_block_without_id(); diff --git a/zjit/src/backend/tests.rs b/zjit/src/backend/tests.rs index 701029b8ec0c2c..32b6fe9b5ef31e 100644 --- a/zjit/src/backend/tests.rs +++ b/zjit/src/backend/tests.rs @@ -3,7 +3,6 @@ use crate::backend::lir::*; use crate::cruby::*; use crate::codegen::c_callable; use crate::options::rb_zjit_prepare_options; -use crate::hir; #[test] fn test_add() { diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index a4cf8dfcc5e892..b045e0f3a3d04b 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -140,7 +140,7 @@ impl Assembler { { let mut asm_local = Assembler::new_with_asm(&self); let asm = &mut asm_local; - let live_ranges: Vec = take(&mut self.live_ranges); + let live_ranges = take(&mut self.live_ranges); let mut iterator = self.instruction_iterator(); while let Some((index, mut insn)) = iterator.next(asm) { @@ -166,7 +166,7 @@ impl Assembler { // When we split an operand, we can create a new VReg not in `live_ranges`. // So when we see a VReg with out-of-range index, it's created from splitting // from the loop above and we know it doesn't outlive the current instruction. - let vreg_outlives_insn = |vreg_idx| { + let vreg_outlives_insn = |vreg_idx: VRegId| { live_ranges .get(vreg_idx) .is_some_and(|live_range: &LiveRange| live_range.end() > index) @@ -472,7 +472,7 @@ impl Assembler { asm_local.accept_scratch_reg = true; asm_local.stack_base_idx = self.stack_base_idx; asm_local.label_names = self.label_names.clone(); - asm_local.live_ranges.resize(self.live_ranges.len(), LiveRange { start: None, end: None }); + asm_local.live_ranges = LiveRanges::new(self.live_ranges.len()); // Create one giant block to linearize everything into asm_local.new_block_without_id(); From ed0a5c6f0f09eadafe1ecdfad78ff79571c007b8 Mon Sep 17 00:00:00 2001 From: BurdetteLamar Date: Sun, 25 Jan 2026 23:40:57 +0000 Subject: [PATCH 10/77] [DOC] Fix links in Complex --- complex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/complex.c b/complex.c index 85d724f273b3ea..1ba786a5bb8703 100644 --- a/complex.c +++ b/complex.c @@ -2645,9 +2645,9 @@ float_arg(VALUE self) * First, what's elsewhere: * * - Class \Complex inherits (directly or indirectly) - * from classes {Numeric}[rdoc-ref:Numeric@What-27s+Here] - * and {Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes (indirectly) module {Comparable}[rdoc-ref:Comparable@What-27s+Here]. + * from classes {Numeric}[rdoc-ref:Numeric@Whats-Here] + * and {Object}[rdoc-ref:Object@Whats-Here]. + * - Includes (indirectly) module {Comparable}[rdoc-ref:Comparable@Whats-Here]. * * Here, class \Complex has methods for: * From 21f8472e77c3c393f1a40524fc70aad3714026f6 Mon Sep 17 00:00:00 2001 From: BurdetteLamar Date: Sun, 25 Jan 2026 23:19:16 +0000 Subject: [PATCH 11/77] [DOC] Fix links in Array --- array.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/array.c b/array.c index b4718238763bab..4496dde2626500 100644 --- a/array.c +++ b/array.c @@ -8423,12 +8423,12 @@ rb_ary_deconstruct(VALUE ary) * * [1, 'one', :one, [2, 'two', :two]] * - * - A {%w or %W string-array Literal}[rdoc-ref:syntax/literals.rdoc@25w+and+-25W-3A+String-Array+Literals]: + * - A {%w or %W string-array Literal}[rdoc-ref:syntax/literals.rdoc@w-and-w-String-Array-Literals]: * * %w[foo bar baz] # => ["foo", "bar", "baz"] * %w[1 % *] # => ["1", "%", "*"] * - * - A {%i or %I symbol-array Literal}[rdoc-ref:syntax/literals.rdoc@25i+and+-25I-3A+Symbol-Array+Literals]: + * - A {%i or %I symbol-array Literal}[rdoc-ref:syntax/literals.rdoc@i+and-I-Symbol-Array+Literals]: * * %i[foo bar baz] # => [:foo, :bar, :baz] * %i[1 % *] # => [:"1", :%, :*] @@ -8690,8 +8690,8 @@ rb_ary_deconstruct(VALUE ary) * * First, what's elsewhere. Class \Array: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats-Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats-Here], * which provides dozens of additional methods. * * Here, class \Array provides methods that are useful for: From b7102933ee66639a3c92928f4166356b92786744 Mon Sep 17 00:00:00 2001 From: BurdetteLamar Date: Sun, 25 Jan 2026 17:18:51 +0000 Subject: [PATCH 12/77] [DOC] Doc for Module.nesting --- eval.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/eval.c b/eval.c index deadd5dd6414fb..fd370a43ccd2f2 100644 --- a/eval.c +++ b/eval.c @@ -329,17 +329,24 @@ ruby_exec_node(void *n) /* * call-seq: - * Module.nesting -> array - * - * Returns the list of +Modules+ nested at the point of call. + * Module.nesting -> array + * + * Returns nested module as an array of Module objects: + * + * module M0 + * def self.speak = Module.nesting + * module M1 + * def self.speak = Module.nesting + * module M2 + * def self.speak = Module.nesting + * end + * end + * end + * M0.speak # => [M0] + * M0.speak.first.class # => Module + * M0::M1.speak # => [M0::M1, M0] + * M0::M1::M2.speak # => [M0::M1::M2, M0::M1, M0] * - * module M1 - * module M2 - * $a = Module.nesting - * end - * end - * $a #=> [M1::M2, M1] - * $a[0].name #=> "M1::M2" */ static VALUE From 6b0dda496e9bdf7c24e7a58514f9fcb5879dad2f Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 26 Jan 2026 17:11:06 -0500 Subject: [PATCH 13/77] ZJIT: Add temporary local definite assignment validator (#15973) Until we get our global register allocator, we need our HIR to be in 100% block-local SSA. Add a validator to enforce that. --- zjit/src/hir.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 2aa74dce8be26c..79004c8737e4ee 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -5125,6 +5125,34 @@ impl Function { Ok(()) } + // Validate that every instruction use is from a block-local definition, which is a temporary + // constraint until we get a global register allocator. + // TODO(tenderworks): Remove this + fn temporary_validate_block_local_definite_assignment(&self) -> Result<(), ValidationError> { + for block in self.rpo() { + let mut assigned = InsnSet::with_capacity(self.insns.len()); + for ¶m in &self.blocks[block.0].params { + assigned.insert(param); + } + // Check that each instruction's operands are assigned + for &insn_id in &self.blocks[block.0].insns { + let insn_id = self.union_find.borrow().find_const(insn_id); + let mut operands = VecDeque::new(); + let insn = self.find(insn_id); + self.worklist_traverse_single_insn(&insn, &mut operands); + for operand in operands { + if !assigned.get(operand) { + return Err(ValidationError::OperandNotDefined(block, insn_id, operand)); + } + } + if insn.has_output() { + assigned.insert(insn_id); + } + } + } + Ok(()) + } + /// Checks that each instruction('s representative) appears only once in the CFG. fn validate_insn_uniqueness(&self) -> Result<(), ValidationError> { let mut seen = InsnSet::with_capacity(self.insns.len()); @@ -5425,6 +5453,7 @@ impl Function { pub fn validate(&self) -> Result<(), ValidationError> { self.validate_block_terminators_and_jumps()?; self.validate_definite_assignment()?; + self.temporary_validate_block_local_definite_assignment()?; self.validate_insn_uniqueness()?; self.validate_types()?; Ok(()) @@ -7577,6 +7606,16 @@ mod validation_tests { assert_matches_err(function.validate_definite_assignment(), ValidationError::OperandNotDefined(entry, val, dangling)); } + #[test] + fn not_defined_within_bb_block_local() { + let mut function = Function::new(std::ptr::null()); + let entry = function.entry_block; + // Create an instruction without making it belong to anything. + let dangling = function.new_insn(Insn::Const{val: Const::CBool(true)}); + let val = function.push_insn(function.entry_block, Insn::ArrayDup { val: dangling, state: InsnId(0usize) }); + assert_matches_err(function.temporary_validate_block_local_definite_assignment(), ValidationError::OperandNotDefined(entry, val, dangling)); + } + #[test] fn using_non_output_insn() { let mut function = Function::new(std::ptr::null()); @@ -7588,6 +7627,17 @@ mod validation_tests { assert_matches_err(function.validate_definite_assignment(), ValidationError::OperandNotDefined(entry, val, ret)); } + #[test] + fn using_non_output_insn_block_local() { + let mut function = Function::new(std::ptr::null()); + let entry = function.entry_block; + let const_ = function.push_insn(function.entry_block, Insn::Const{val: Const::CBool(true)}); + // Ret is a non-output instruction. + let ret = function.push_insn(function.entry_block, Insn::Return { val: const_ }); + let val = function.push_insn(function.entry_block, Insn::ArrayDup { val: ret, state: InsnId(0usize) }); + assert_matches_err(function.temporary_validate_block_local_definite_assignment(), ValidationError::OperandNotDefined(entry, val, ret)); + } + #[test] fn not_dominated_by_diamond() { // This tests that one branch is missing a definition which fails. From 491e38902c2f5deef5dbb249cdf609ae2bb52f1e Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 26 Jan 2026 17:10:35 -0500 Subject: [PATCH 14/77] [DOC] Fix links to What's Here The RDoc link format has changed so these are all broken links. --- dir.rb | 6 +++--- doc/float.rb | 6 +++--- doc/string.rb | 4 ++-- file.c | 2 +- hash.c | 8 ++++---- io.c | 4 ++-- numeric.c | 10 +++++----- object.c | 4 ++-- range.c | 4 ++-- set.c | 4 ++-- string.c | 4 ++-- struct.c | 4 ++-- timev.rb | 4 ++-- 13 files changed, 32 insertions(+), 32 deletions(-) diff --git a/dir.rb b/dir.rb index eb1a408ee3ac5b..9b83f688227d6c 100644 --- a/dir.rb +++ b/dir.rb @@ -31,7 +31,7 @@ # A \Dir object is in some ways array-like: # # - It has instance methods #children, #each, and #each_child. -# - It includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here]. +# - It includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here]. # # == \Dir As Stream-Like # @@ -85,8 +85,8 @@ # # First, what's elsewhere. Class \Dir: # -# - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. -# - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], +# - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. +# - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], # which provides dozens of additional methods. # # Here, class \Dir provides methods that are useful for: diff --git a/doc/float.rb b/doc/float.rb index 01668bfc6dacf1..f9068dfb1b82cc 100644 --- a/doc/float.rb +++ b/doc/float.rb @@ -72,9 +72,9 @@ # First, what's elsewhere. Class \Float: # # - Inherits from -# {class Numeric}[rdoc-ref:Numeric@What-27s+Here] -# and {class Object}[rdoc-ref:Object@What-27s+Here]. -# - Includes {module Comparable}[rdoc-ref:Comparable@What-27s+Here]. +# {class Numeric}[rdoc-ref:Numeric@Whats+Here] +# and {class Object}[rdoc-ref:Object@Whats+Here]. +# - Includes {module Comparable}[rdoc-ref:Comparable@Whats+Here]. # # Here, class \Float provides methods for: # diff --git a/doc/string.rb b/doc/string.rb index 304ab60c298967..e2dfb37c9fa5eb 100644 --- a/doc/string.rb +++ b/doc/string.rb @@ -163,8 +163,8 @@ # # First, what's elsewhere. Class +String+: # -# - Inherits from the {Object class}[rdoc-ref:Object@What-27s+Here]. -# - Includes the {Comparable module}[rdoc-ref:Comparable@What-27s+Here]. +# - Inherits from the {Object class}[rdoc-ref:Object@Whats+Here]. +# - Includes the {Comparable module}[rdoc-ref:Comparable@Whats+Here]. # # Here, class +String+ provides methods that are useful for: # diff --git a/file.c b/file.c index 8f4e9d86c8241c..706b60c9997919 100644 --- a/file.c +++ b/file.c @@ -7507,7 +7507,7 @@ const char ruby_null_device[] = * * First, what's elsewhere. Class \File: * - * - Inherits from {class IO}[rdoc-ref:IO@What-27s+Here], + * - Inherits from {class IO}[rdoc-ref:IO@Whats+Here], * in particular, methods for creating, reading, and writing files * - Includes module FileTest, * which provides dozens of additional methods. diff --git a/hash.c b/hash.c index 07eeb779e9f197..e116eb8ab6f026 100644 --- a/hash.c +++ b/hash.c @@ -7216,8 +7216,8 @@ static const rb_data_type_t env_data_type = { * * First, what's elsewhere. Class +Hash+: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * Here, class +Hash+ provides methods that are useful for: @@ -7528,8 +7528,8 @@ Init_Hash(void) * * First, what's elsewhere. Class +ENV+: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Extends {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Extends {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * * Here, class +ENV+ provides methods that are useful for: * diff --git a/io.c b/io.c index 25c66550f5c382..8563fa6536c02f 100644 --- a/io.c +++ b/io.c @@ -15469,8 +15469,8 @@ set_LAST_READ_LINE(VALUE val, ID _x, VALUE *_y) * * First, what's elsewhere. Class \IO: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * Here, class \IO provides methods that are useful for: diff --git a/numeric.c b/numeric.c index e8df2a6aa0568c..36101882943761 100644 --- a/numeric.c +++ b/numeric.c @@ -3680,9 +3680,9 @@ rb_int128_to_numeric(rb_int128_t n) * First, what's elsewhere. Class \Integer: * * - Inherits from - * {class Numeric}[rdoc-ref:Numeric@What-27s+Here] - * and {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Comparable}[rdoc-ref:Comparable@What-27s+Here]. + * {class Numeric}[rdoc-ref:Numeric@Whats+Here] + * and {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Comparable}[rdoc-ref:Comparable@Whats+Here]. * * Here, class \Integer provides methods for: * @@ -6365,8 +6365,8 @@ int_s_try_convert(VALUE self, VALUE num) * * First, what's elsewhere. Class \Numeric: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Comparable}[rdoc-ref:Comparable@What-27s+Here]. + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Comparable}[rdoc-ref:Comparable@Whats+Here]. * * Here, class \Numeric provides methods for: * diff --git a/object.c b/object.c index 75186a30c66868..07eb1d8e975251 100644 --- a/object.c +++ b/object.c @@ -4357,8 +4357,8 @@ rb_f_loop_size(VALUE self, VALUE args, VALUE eobj) * * First, what's elsewhere. Class \Object: * - * - Inherits from {class BasicObject}[rdoc-ref:BasicObject@What-27s+Here]. - * - Includes {module Kernel}[rdoc-ref:Kernel@What-27s+Here]. + * - Inherits from {class BasicObject}[rdoc-ref:BasicObject@Whats+Here]. + * - Includes {module Kernel}[rdoc-ref:Kernel@Whats+Here]. * * Here, class \Object provides methods for: * diff --git a/range.c b/range.c index fd08a81de7b8b1..36afdfa7619005 100644 --- a/range.c +++ b/range.c @@ -2768,8 +2768,8 @@ range_overlap(VALUE range, VALUE other) * * First, what's elsewhere. Class \Range: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * Here, class \Range provides methods that are useful for: diff --git a/set.c b/set.c index 4d8178ffc080de..484439a40a6ab7 100644 --- a/set.c +++ b/set.c @@ -2051,8 +2051,8 @@ rb_set_size(VALUE set) * * First, what's elsewhere. \Class \Set: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * In particular, class \Set does not have many methods of its own diff --git a/string.c b/string.c index 464eab21463ff3..a36eb6e9f381c0 100644 --- a/string.c +++ b/string.c @@ -12174,8 +12174,8 @@ rb_str_unicode_normalized_p(int argc, VALUE *argv, VALUE str) * * First, what's elsewhere. Class +Symbol+: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Comparable}[rdoc-ref:Comparable@What-27s+Here]. + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Comparable}[rdoc-ref:Comparable@Whats+Here]. * * Here, class +Symbol+ provides methods that are useful for: * diff --git a/struct.c b/struct.c index 65410ebdf302b8..61aff40a32c081 100644 --- a/struct.c +++ b/struct.c @@ -2134,8 +2134,8 @@ rb_data_inspect(VALUE s) * * First, what's elsewhere. Class \Struct: * - * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. - * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@Whats+Here], * which provides dozens of additional methods. * * See also Data, which is a somewhat similar, but stricter concept for defining immutable diff --git a/timev.rb b/timev.rb index cf8a88e64eff0c..005c3d481a0ebf 100644 --- a/timev.rb +++ b/timev.rb @@ -170,8 +170,8 @@ # # First, what's elsewhere. Class +Time+: # -# - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. -# - Includes {module Comparable}[rdoc-ref:Comparable@What-27s+Here]. +# - Inherits from {class Object}[rdoc-ref:Object@Whats+Here]. +# - Includes {module Comparable}[rdoc-ref:Comparable@Whats+Here]. # # Here, class +Time+ provides methods that are useful for: # From 969fd30cb17a45943011768f40f281f5f270acc6 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 26 Jan 2026 21:00:32 -0500 Subject: [PATCH 15/77] [ruby/net-http] [DOC] Fix links The RDoc link format has changed so these are all broken links. https://github.com/ruby/net-http/commit/97fe6085c3 --- lib/net/http.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/net/http.rb b/lib/net/http.rb index 98d6793aee033d..98639978a2fbf6 100644 --- a/lib/net/http.rb +++ b/lib/net/http.rb @@ -460,7 +460,7 @@ class HTTPHeaderSyntaxError < StandardError; end # # First, what's elsewhere. Class Net::HTTP: # - # - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. + # - Inherits from {class Object}[rdoc-ref:Object#class-object-whats-here]. # # This is a categorized summary of methods and attributes. # @@ -1304,7 +1304,7 @@ def response_body_encoding=(value) # Sets whether to determine the proxy from environment variable # 'ENV['http_proxy']'; - # see {Proxy Using ENV['http_proxy']}[rdoc-ref:Net::HTTP@Proxy+Using+-27ENV-5B-27http_proxy-27-5D-27]. + # see {Proxy Using ENV['http_proxy']}[rdoc-ref:Net::HTTP@Proxy+Using+ENVHTTPProxy]. attr_writer :proxy_from_env # Sets the proxy address; From 09872ea950b932947fd55e02ffad7200845672d0 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Sat, 20 Dec 2025 03:11:07 -0800 Subject: [PATCH 16/77] [ruby/resolv] add missing typeclasses to doc https://github.com/ruby/resolv/commit/78df896829 --- lib/resolv.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/resolv.rb b/lib/resolv.rb index fa7d4e2e4753b3..b6ff3485182e5d 100644 --- a/lib/resolv.rb +++ b/lib/resolv.rb @@ -487,13 +487,18 @@ def each_name(address) # * Resolv::DNS::Resource::IN::A # * Resolv::DNS::Resource::IN::AAAA # * Resolv::DNS::Resource::IN::ANY + # * Resolv::DNS::Resource::IN::CAA # * Resolv::DNS::Resource::IN::CNAME # * Resolv::DNS::Resource::IN::HINFO + # * Resolv::DNS::Resource::IN::HTTPS + # * Resolv::DNS::Resource::IN::LOC # * Resolv::DNS::Resource::IN::MINFO # * Resolv::DNS::Resource::IN::MX # * Resolv::DNS::Resource::IN::NS # * Resolv::DNS::Resource::IN::PTR # * Resolv::DNS::Resource::IN::SOA + # * Resolv::DNS::Resource::IN::SRV + # * Resolv::DNS::Resource::IN::SVCB # * Resolv::DNS::Resource::IN::TXT # * Resolv::DNS::Resource::IN::WKS # From c55d214ed383b6583ee68cffee69a63a2c92fe32 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Sat, 20 Dec 2025 03:23:56 -0800 Subject: [PATCH 17/77] [ruby/resolv] add getresources test for every resource typeclass https://github.com/ruby/resolv/commit/4bad8bccfc --- test/resolv/test_dns.rb | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test/resolv/test_dns.rb b/test/resolv/test_dns.rb index 7a01909eeb9a4b..5cf4087d999481 100644 --- a/test/resolv/test_dns.rb +++ b/test/resolv/test_dns.rb @@ -942,4 +942,30 @@ def test_tcp_connection_closed_with_partial_message_body client_thread.join end end + + def test_every_resource_typeclass + Resolv::DNS.open do |dns| + [ + Resolv::DNS::Resource::IN::A, + Resolv::DNS::Resource::IN::AAAA, + Resolv::DNS::Resource::IN::ANY, + Resolv::DNS::Resource::IN::CAA, + Resolv::DNS::Resource::IN::CNAME, + Resolv::DNS::Resource::IN::HINFO, + Resolv::DNS::Resource::IN::HTTPS, + Resolv::DNS::Resource::IN::LOC, + Resolv::DNS::Resource::IN::MINFO, + Resolv::DNS::Resource::IN::MX, + Resolv::DNS::Resource::IN::NS, + Resolv::DNS::Resource::IN::PTR, + Resolv::DNS::Resource::IN::SOA, + Resolv::DNS::Resource::IN::SRV, + Resolv::DNS::Resource::IN::SVCB, + Resolv::DNS::Resource::IN::TXT, + Resolv::DNS::Resource::IN::WKS + ].each do |typeclass| + assert_instance_of(Array, dns.getresources("ruby-lang.org", typeclass)) + end + end + end end From ade85c45da666a174b618170cc9e680351a18a1b Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Fri, 16 Jan 2026 01:10:16 -0800 Subject: [PATCH 18/77] [ruby/resolv] remove test for every class https://github.com/ruby/resolv/commit/96e483d55b --- test/resolv/test_dns.rb | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/test/resolv/test_dns.rb b/test/resolv/test_dns.rb index 5cf4087d999481..7a01909eeb9a4b 100644 --- a/test/resolv/test_dns.rb +++ b/test/resolv/test_dns.rb @@ -942,30 +942,4 @@ def test_tcp_connection_closed_with_partial_message_body client_thread.join end end - - def test_every_resource_typeclass - Resolv::DNS.open do |dns| - [ - Resolv::DNS::Resource::IN::A, - Resolv::DNS::Resource::IN::AAAA, - Resolv::DNS::Resource::IN::ANY, - Resolv::DNS::Resource::IN::CAA, - Resolv::DNS::Resource::IN::CNAME, - Resolv::DNS::Resource::IN::HINFO, - Resolv::DNS::Resource::IN::HTTPS, - Resolv::DNS::Resource::IN::LOC, - Resolv::DNS::Resource::IN::MINFO, - Resolv::DNS::Resource::IN::MX, - Resolv::DNS::Resource::IN::NS, - Resolv::DNS::Resource::IN::PTR, - Resolv::DNS::Resource::IN::SOA, - Resolv::DNS::Resource::IN::SRV, - Resolv::DNS::Resource::IN::SVCB, - Resolv::DNS::Resource::IN::TXT, - Resolv::DNS::Resource::IN::WKS - ].each do |typeclass| - assert_instance_of(Array, dns.getresources("ruby-lang.org", typeclass)) - end - end - end end From 83713db7f1d0c927984350954ec6b8e72662482a Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 27 Jan 2026 11:18:31 +0100 Subject: [PATCH 19/77] gc.c: Fix `rb_gc_obj_needs_cleanup_p` - T_BIGNUM may have fields via `#object_id`. - The T_DATA logic was inversed. If `dfree` is unset we don't need cleanup. --- gc.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/gc.c b/gc.c index f1c7f834d0f70c..935a9f5d4bdaaa 100644 --- a/gc.c +++ b/gc.c @@ -1278,15 +1278,6 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) } case T_DATA: - if (flags & RUBY_TYPED_FL_IS_TYPED_DATA) { - uintptr_t type = (uintptr_t)RTYPEDDATA(obj)->type; - if (type & TYPED_DATA_EMBEDDED) { - RUBY_DATA_FUNC dfree = ((const rb_data_type_t *)(type & TYPED_DATA_PTR_MASK))->function.dfree; - return (dfree == RUBY_NEVER_FREE || dfree == RUBY_TYPED_DEFAULT_FREE); - } - } - return true; - case T_OBJECT: case T_STRING: case T_ARRAY: @@ -1298,7 +1289,13 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) case T_COMPLEX: break; - default: + case T_FILE: + case T_SYMBOL: + case T_CLASS: + case T_ICLASS: + case T_MODULE: + case T_REGEXP: + case T_MATCH: return true; } @@ -1310,6 +1307,18 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) if (flags & ROBJECT_HEAP) return true; return false; + case T_DATA: + if (flags & RUBY_TYPED_FL_IS_TYPED_DATA) { + uintptr_t type = (uintptr_t)RTYPEDDATA(obj)->type; + if (type & TYPED_DATA_EMBEDDED) { + RUBY_DATA_FUNC dfree = ((const rb_data_type_t *)(type & TYPED_DATA_PTR_MASK))->function.dfree; + if (dfree == RUBY_NEVER_FREE || dfree == RUBY_TYPED_DEFAULT_FREE) { + return false; + } + } + } + return true; + case T_STRING: if (flags & (RSTRING_NOEMBED | RSTRING_FSTR)) return true; return rb_shape_has_fields(shape_id); @@ -1324,7 +1333,7 @@ rb_gc_obj_needs_cleanup_p(VALUE obj) case T_BIGNUM: if (!(flags & BIGNUM_EMBED_FLAG)) return true; - return false; + return rb_shape_has_fields(shape_id); case T_STRUCT: if (!(flags & RSTRUCT_EMBED_LEN_MASK)) return true; From fa3e3d1090b2f843735c467aec31dfe7c34581cd Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 27 Jan 2026 23:38:38 +0900 Subject: [PATCH 20/77] Ignore EOL code changes [ci skip] --- .git-blame-ignore-revs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index d98646febf69c1..bc5d291065c335 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -39,3 +39,7 @@ d4e24021d39e1f80f0055b55d91f8d5f22e15084 e90282be7ba1bc8e3119f6e1a2c80356ceb3f80a 26a9e0b4e31f7b5a9cbd755e0a15823a8fa51bae 2f53985da9ee593fe524d408256835667938c7d7 + +# Win32: EOL code of batch files +23f9a0d655c4d405bb2397a147a1523436205486 +b839989fd22fef85e2af19de1bc83aa72a5b22bd From 5d769228c1055524205437860beb1fc2de2d11a0 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Mon, 26 Jan 2026 17:28:31 +0100 Subject: [PATCH 21/77] [ruby/prism] Remove `Prism.lex_ripper` Since `on_sp` is emitted, it doesn't do a whole lot anymore. This leaves one incompatibility for code like `"x#$%"` Ripper confuses this for bare interpolation with a global, but `$%` is not a valid global name. Still, it emits two string tokens in such a case. It doesn't make sense for prism to work around this bug, so the affected files are added as excludes. Since the only usage of this method makes sense for testing in prism itself, the method is removed instead of deprecated. https://github.com/ruby/prism/commit/31be379f98 --- lib/prism.rb | 11 --------- lib/prism/lex_ripper.rb | 55 ----------------------------------------- lib/prism/prism.gemspec | 1 - test/prism/bom_test.rb | 3 ++- test/prism/lex_test.rb | 3 ++- 5 files changed, 4 insertions(+), 69 deletions(-) delete mode 100644 lib/prism/lex_ripper.rb diff --git a/lib/prism.rb b/lib/prism.rb index dab3420377214f..781bd4bb0115db 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -20,7 +20,6 @@ module Prism autoload :DSL, "prism/dsl" autoload :InspectVisitor, "prism/inspect_visitor" autoload :LexCompat, "prism/lex_compat" - autoload :LexRipper, "prism/lex_ripper" autoload :MutationCompiler, "prism/mutation_compiler" autoload :Pack, "prism/pack" autoload :Pattern, "prism/pattern" @@ -35,7 +34,6 @@ module Prism # private here. private_constant :LexCompat - private_constant :LexRipper # Raised when requested to parse as the currently running Ruby version but Prism has no support for it. class CurrentVersionError < ArgumentError @@ -68,15 +66,6 @@ def self.lex_compat(source, **options) LexCompat.new(source, **options).result # steep:ignore end - # :call-seq: - # Prism::lex_ripper(source) -> Array - # - # This wraps the result of Ripper.lex. It produces almost exactly the - # same tokens. Raises SyntaxError if the syntax in source is invalid. - def self.lex_ripper(source) - LexRipper.new(source).result # steep:ignore - end - # :call-seq: # Prism::load(source, serialized, freeze) -> ParseResult # diff --git a/lib/prism/lex_ripper.rb b/lib/prism/lex_ripper.rb deleted file mode 100644 index f069e50ba9aa77..00000000000000 --- a/lib/prism/lex_ripper.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true -# :markup: markdown - -require "ripper" - -module Prism - # This is a class that wraps the Ripper lexer to produce almost exactly the - # same tokens. - class LexRipper # :nodoc: - attr_reader :source - - def initialize(source) - @source = source - end - - def result - previous = [] #: [[Integer, Integer], Symbol, String, untyped] | [] - results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]] - - lex(source).each do |token| - case token[1] - when :on_tstring_content - if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) - previous[2] << token[2] - else - results << token - previous = token - end - else - results << token - previous = token - end - end - - results - end - - private - - if Ripper.method(:lex).parameters.assoc(:keyrest) - def lex(source) - Ripper.lex(source, raise_errors: true) - end - else - def lex(source) - ripper = Ripper::Lexer.new(source) - ripper.lex.tap do |result| - raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any? - end - end - end - end - - private_constant :LexRipper -end diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index 283c7b04aa95e6..8c9b140f0e342b 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -77,7 +77,6 @@ Gem::Specification.new do |spec| "lib/prism/ffi.rb", "lib/prism/inspect_visitor.rb", "lib/prism/lex_compat.rb", - "lib/prism/lex_ripper.rb", "lib/prism/mutation_compiler.rb", "lib/prism/node_ext.rb", "lib/prism/node.rb", diff --git a/test/prism/bom_test.rb b/test/prism/bom_test.rb index 890bc4b36c3ac4..0fa00ae4e844b6 100644 --- a/test/prism/bom_test.rb +++ b/test/prism/bom_test.rb @@ -5,6 +5,7 @@ return if RUBY_ENGINE != "ruby" require_relative "test_helper" +require "ripper" module Prism class BOMTest < TestCase @@ -53,7 +54,7 @@ def test_string def assert_bom(source) bommed = "\xEF\xBB\xBF#{source}" - assert_equal Prism.lex_ripper(bommed), Prism.lex_compat(bommed).value + assert_equal Ripper.lex(bommed), Prism.lex_compat(bommed).value end end end diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb index ea4606d2fb6251..9a9f203c280d98 100644 --- a/test/prism/lex_test.rb +++ b/test/prism/lex_test.rb @@ -3,6 +3,7 @@ return if !(RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0") require_relative "test_helper" +require "ripper" module Prism class LexTest < TestCase @@ -49,7 +50,7 @@ def test_parse_lex_file if RUBY_VERSION >= "3.3" def test_lex_compare prism = Prism.lex_compat(File.read(__FILE__), version: "current").value - ripper = Prism.lex_ripper(File.read(__FILE__)) + ripper = Ripper.lex(File.read(__FILE__)) assert_equal(ripper, prism) end end From 52e71ad4b766d57236e66ebbc419a4447fb0b491 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Tue, 27 Jan 2026 12:36:55 -0500 Subject: [PATCH 22/77] Fix test for mutex starvation as well as small fix in thread_sync.c (#15982) Don't reset `th->running_time_us` when unlocking from `mutex_free` or force unlocking during thread destruction. Follow-up to 994257ab06072d. --- test/ruby/test_thread.rb | 12 +++++++----- thread_sync.c | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/test/ruby/test_thread.rb b/test/ruby/test_thread.rb index 60e3aa772a8642..47a8e94c07c008 100644 --- a/test/ruby/test_thread.rb +++ b/test/ruby/test_thread.rb @@ -1667,22 +1667,24 @@ def test_mn_threads_sub_millisecond_sleep # [Bug #21840] def test_mutex_owner_doesnt_starve_waiters - assert_ruby_status([], "#{<<~"begin;"}\n#{<<~'end;'}") + assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") begin; + require "tempfile" + temp = Tempfile.new("temp") m = Mutex.new - fib = lambda { |n| + def fib(n) return n if n <= 1 fib(n - 1) + fib(n - 2) - } + end t1_running = false - t1 = Thread.new do + Thread.new do t1_running = true loop do fib(20) m.synchronize do - File.open(__FILE__) { } # reset timeslice due to blocking operation + File.open(temp.path) { } # reset timeslice due to blocking operation end end end diff --git a/thread_sync.c b/thread_sync.c index 2963b6db73b123..8b86c903809c8d 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -466,7 +466,7 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) struct sync_waiter *cur = 0, *next; - if (mutex->wait_waking) { + if (mutex->wait_waking && ec_serial) { uint32_t saved = mutex->saved_running_time_us; if (th->running_time_us < saved) { th->running_time_us = saved; From 6f6ed79a97a2e816a54a1289fd1f33e238ce22c4 Mon Sep 17 00:00:00 2001 From: Nozomi Hijikata <121233810+nozomemein@users.noreply.github.com> Date: Wed, 28 Jan 2026 04:53:24 +0900 Subject: [PATCH 23/77] ZJIT: Compile invokesuperforward (#15958) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes: https://github.com/Shopify/ruby/issues/862 Add dynamic dispatch for `invokesuperforward` instruction as a first step. Specialization like YJIT’s is not implemented yet and will be handled separately. ## Benchmark ### lobsters
before patch ``` Average of last 10, non-warmup iters: 654ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (59.5% of total 15,599,811): Hash#fetch: 3,185,110 (20.4%) Regexp#match?: 708,802 ( 4.5%) Hash#key?: 696,422 ( 4.5%) String#sub!: 489,840 ( 3.1%) Set#include?: 396,625 ( 2.5%) String#<<: 396,279 ( 2.5%) String#start_with?: 379,336 ( 2.4%) Hash#delete: 325,992 ( 2.1%) String.new: 307,248 ( 2.0%) Integer#===: 279,054 ( 1.8%) Symbol#end_with?: 255,539 ( 1.6%) Kernel#is_a?: 246,961 ( 1.6%) Process.clock_gettime: 221,588 ( 1.4%) Integer#>: 219,718 ( 1.4%) String#match?: 218,056 ( 1.4%) Integer#<=: 202,617 ( 1.3%) Time#to_i: 192,214 ( 1.2%) Time#subsec: 189,240 ( 1.2%) String#to_sym: 185,593 ( 1.2%) String#include?: 182,862 ( 1.2%) Top-20 calls to C functions from JIT code (83.7% of total 126,406,213): rb_vm_opt_send_without_block: 37,054,888 (29.3%) rb_vm_send: 10,068,319 ( 8.0%) rb_vm_env_write: 8,529,584 ( 6.7%) rb_hash_aref: 8,014,188 ( 6.3%) rb_zjit_writebarrier_check_immediate: 7,697,828 ( 6.1%) rb_vm_getinstancevariable: 5,954,987 ( 4.7%) rb_ivar_get_at_no_ractor_check: 4,759,191 ( 3.8%) rb_obj_is_kind_of: 3,722,656 ( 2.9%) rb_vm_invokesuper: 2,663,433 ( 2.1%) rb_hash_aset: 2,416,121 ( 1.9%) rb_vm_setinstancevariable: 2,355,463 ( 1.9%) rb_vm_opt_getconstant_path: 2,297,784 ( 1.8%) Hash#fetch: 1,779,524 ( 1.4%) fetch: 1,405,586 ( 1.1%) rb_vm_invokeblock: 1,385,970 ( 1.1%) rb_str_buf_append: 1,369,178 ( 1.1%) rb_ec_ary_new_from_values: 1,336,805 ( 1.1%) rb_class_allocate_instance: 1,281,590 ( 1.0%) rb_hash_new_with_size: 899,859 ( 0.7%) rb_vm_sendforward: 798,572 ( 0.6%) Top-2 not optimized method types for send (100.0% of total 4,889,764): iseq: 4,886,942 (99.9%) null: 2,822 ( 0.1%) Top-3 not optimized method types for send_without_block (100.0% of total 525,349): optimized_send: 478,875 (91.2%) null: 42,175 ( 8.0%) optimized_block_call: 4,299 ( 0.8%) Top-3 not optimized method types for super (100.0% of total 2,350,295): cfunc: 2,239,567 (95.3%) alias: 107,374 ( 4.6%) attrset: 3,354 ( 0.1%) Top-3 instructions with uncategorized fallback reason (100.0% of total 2,216,938): invokeblock: 1,385,970 (62.5%) sendforward: 798,572 (36.0%) opt_send_without_block: 32,396 ( 1.5%) Top-20 send fallback reasons (99.9% of total 51,971,182): send_without_block_polymorphic: 18,639,354 (35.9%) singleton_class_seen: 9,274,307 (17.8%) send_without_block_no_profiles: 7,217,551 (13.9%) send_not_optimized_method_type: 4,889,764 ( 9.4%) send_no_profiles: 2,882,604 ( 5.5%) super_not_optimized_method_type: 2,350,295 ( 4.5%) uncategorized: 2,216,938 ( 4.3%) one_or_more_complex_arg_pass: 1,543,405 ( 3.0%) send_without_block_megamorphic: 723,037 ( 1.4%) send_polymorphic: 544,570 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 483,174 ( 0.9%) send_without_block_not_optimized_need_permission: 390,366 ( 0.8%) too_many_args_for_lir: 312,568 ( 0.6%) super_complex_args_pass: 111,053 ( 0.2%) super_target_complex_args_pass: 104,723 ( 0.2%) super_polymorphic: 87,851 ( 0.2%) argc_param_mismatch: 50,382 ( 0.1%) send_without_block_not_optimized_method_type: 42,175 ( 0.1%) obj_to_string_not_string: 34,861 ( 0.1%) send_without_block_direct_keyword_mismatch: 32,436 ( 0.1%) Top-4 setivar fallback reasons (100.0% of total 2,355,463): not_monomorphic: 2,132,748 (90.5%) not_t_object: 125,163 ( 5.3%) too_complex: 97,531 ( 4.1%) new_shape_needs_extension: 21 ( 0.0%) Top-2 getivar fallback reasons (100.0% of total 6,080,097): not_monomorphic: 5,808,527 (95.5%) too_complex: 271,570 ( 4.5%) Top-3 definedivar fallback reasons (100.0% of total 405,302): not_monomorphic: 397,150 (98.0%) too_complex: 5,122 ( 1.3%) not_t_object: 3,030 ( 0.7%) Top-6 invokeblock handler (100.0% of total 1,385,970): monomorphic_iseq: 688,147 (49.7%) polymorphic: 523,864 (37.8%) monomorphic_other: 106,268 ( 7.7%) monomorphic_ifunc: 55,505 ( 4.0%) megamorphic: 6,762 ( 0.5%) no_profiles: 5,424 ( 0.4%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 1,850,659): param_forwardable: 685,936 (37.1%) param_block: 641,355 (34.7%) param_rest: 327,046 (17.7%) param_kwrest: 120,210 ( 6.5%) caller_kw_splat: 36,147 ( 2.0%) caller_splat: 34,029 ( 1.8%) caller_blockarg: 5,826 ( 0.3%) caller_kwarg: 110 ( 0.0%) Top-1 compile error reasons (100.0% of total 191,769): exception_handler: 191,769 (100.0%) Top-6 unhandled YARV insns (100.0% of total 89,278): invokesuperforward: 81,667 (91.5%) getconstant: 3,318 ( 3.7%) setblockparam: 2,837 ( 3.2%) checkmatch: 929 ( 1.0%) expandarray: 360 ( 0.4%) once: 167 ( 0.2%) Top-3 unhandled HIR insns (100.0% of total 236,976): throw: 198,481 (83.8%) invokebuiltin: 35,774 (15.1%) array_max: 2,721 ( 1.1%) Top-20 side exit reasons (100.0% of total 15,409,202): guard_type_failure: 6,871,609 (44.6%) guard_shape_failure: 6,854,409 (44.5%) block_param_proxy_not_iseq_or_ifunc: 1,008,346 ( 6.5%) unhandled_hir_insn: 236,976 ( 1.5%) compile_error: 191,769 ( 1.2%) unhandled_yarv_insn: 89,278 ( 0.6%) fixnum_mult_overflow: 50,739 ( 0.3%) block_param_proxy_modified: 28,119 ( 0.2%) patchpoint_stable_constant_names: 19,872 ( 0.1%) unhandled_newarray_send_pack: 14,481 ( 0.1%) unhandled_block_arg: 13,787 ( 0.1%) fixnum_lshift_overflow: 10,085 ( 0.1%) patchpoint_no_ep_escape: 7,815 ( 0.1%) expandarray_failure: 4,532 ( 0.0%) guard_super_method_entry: 4,475 ( 0.0%) patchpoint_method_redefined: 1,212 ( 0.0%) patchpoint_no_singleton_class: 1,130 ( 0.0%) obj_to_string_fallback: 275 ( 0.0%) guard_less_failure: 163 ( 0.0%) interrupt: 111 ( 0.0%) send_count: 152,221,918 dynamic_send_count: 51,971,182 (34.1%) optimized_send_count: 100,250,736 (65.9%) dynamic_setivar_count: 2,355,463 ( 1.5%) dynamic_getivar_count: 6,080,097 ( 4.0%) dynamic_definedivar_count: 405,302 ( 0.3%) iseq_optimized_send_count: 40,162,692 (26.4%) inline_cfunc_optimized_send_count: 40,296,415 (26.5%) inline_iseq_optimized_send_count: 3,344,046 ( 2.2%) non_variadic_cfunc_optimized_send_count: 8,915,909 ( 5.9%) variadic_cfunc_optimized_send_count: 7,531,674 ( 4.9%) compiled_iseq_count: 5,554 failed_iseq_count: 0 compile_time: 1,779ms profile_time: 13ms gc_time: 19ms invalidation_time: 248ms vm_write_pc_count: 133,179,978 vm_write_sp_count: 133,179,978 vm_write_locals_count: 129,160,863 vm_write_stack_count: 129,160,863 vm_write_to_parent_iseq_local_count: 693,262 vm_read_from_parent_iseq_local_count: 14,736,626 guard_type_count: 157,425,618 guard_type_exit_ratio: 4.4% guard_shape_count: 64,005,824 guard_shape_exit_ratio: 10.7% code_region_bytes: 29,147,136 zjit_alloc_bytes: 44,468,338 total_mem_bytes: 73,615,474 side_exit_count: 15,409,202 total_insn_count: 934,468,730 vm_insn_count: 166,726,703 zjit_insn_count: 767,742,027 ratio_in_zjit: 82.2% ```
after patch ``` Average of last 10, non-warmup iters: 648ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (59.5% of total 15,571,939): Hash#fetch: 3,185,114 (20.5%) Regexp#match?: 708,795 ( 4.6%) Hash#key?: 696,422 ( 4.5%) String#sub!: 489,841 ( 3.1%) Set#include?: 396,625 ( 2.5%) String#<<: 396,279 ( 2.5%) String#start_with?: 370,465 ( 2.4%) Hash#delete: 325,992 ( 2.1%) String.new: 307,248 ( 2.0%) Integer#===: 277,929 ( 1.8%) Symbol#end_with?: 255,540 ( 1.6%) Kernel#is_a?: 246,961 ( 1.6%) Process.clock_gettime: 221,588 ( 1.4%) Integer#>: 219,718 ( 1.4%) String#match?: 218,057 ( 1.4%) Integer#<=: 202,617 ( 1.3%) Time#to_i: 192,214 ( 1.2%) Time#subsec: 189,240 ( 1.2%) String#to_sym: 185,593 ( 1.2%) String#include?: 182,863 ( 1.2%) Top-20 calls to C functions from JIT code (83.7% of total 126,248,940): rb_vm_opt_send_without_block: 36,875,422 (29.2%) rb_vm_send: 10,068,311 ( 8.0%) rb_vm_env_write: 8,529,572 ( 6.8%) rb_hash_aref: 8,014,184 ( 6.3%) rb_zjit_writebarrier_check_immediate: 7,697,776 ( 6.1%) rb_vm_getinstancevariable: 5,934,206 ( 4.7%) rb_ivar_get_at_no_ractor_check: 4,759,185 ( 3.8%) rb_obj_is_kind_of: 3,745,913 ( 3.0%) rb_vm_invokesuper: 2,663,429 ( 2.1%) rb_hash_aset: 2,416,112 ( 1.9%) rb_vm_setinstancevariable: 2,361,107 ( 1.9%) rb_vm_opt_getconstant_path: 2,294,768 ( 1.8%) Hash#fetch: 1,779,524 ( 1.4%) fetch: 1,405,590 ( 1.1%) rb_vm_invokeblock: 1,385,975 ( 1.1%) rb_str_buf_append: 1,369,179 ( 1.1%) rb_ec_ary_new_from_values: 1,336,806 ( 1.1%) rb_class_allocate_instance: 1,281,533 ( 1.0%) rb_hash_new_with_size: 899,857 ( 0.7%) rb_vm_sendforward: 798,572 ( 0.6%) Top-2 not optimized method types for send (100.0% of total 4,889,758): iseq: 4,886,936 (99.9%) null: 2,822 ( 0.1%) Top-3 not optimized method types for send_without_block (100.0% of total 525,350): optimized_send: 478,875 (91.2%) null: 42,176 ( 8.0%) optimized_block_call: 4,299 ( 0.8%) Top-3 not optimized method types for super (100.0% of total 2,350,289): cfunc: 2,239,565 (95.3%) alias: 107,374 ( 4.6%) attrset: 3,350 ( 0.1%) Top-4 instructions with uncategorized fallback reason (100.0% of total 2,298,609): invokeblock: 1,385,975 (60.3%) sendforward: 798,572 (34.7%) invokesuperforward: 81,666 ( 3.6%) opt_send_without_block: 32,396 ( 1.4%) Top-20 send fallback reasons (99.9% of total 51,873,375): send_without_block_polymorphic: 18,540,291 (35.7%) singleton_class_seen: 9,210,394 (17.8%) send_without_block_no_profiles: 7,202,051 (13.9%) send_not_optimized_method_type: 4,889,758 ( 9.4%) send_no_profiles: 2,882,602 ( 5.6%) super_not_optimized_method_type: 2,350,289 ( 4.5%) uncategorized: 2,298,609 ( 4.4%) one_or_more_complex_arg_pass: 1,543,404 ( 3.0%) send_without_block_megamorphic: 723,037 ( 1.4%) send_polymorphic: 544,570 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 483,174 ( 0.9%) send_without_block_not_optimized_need_permission: 389,384 ( 0.8%) too_many_args_for_lir: 312,568 ( 0.6%) super_complex_args_pass: 111,054 ( 0.2%) super_target_complex_args_pass: 104,723 ( 0.2%) super_polymorphic: 87,852 ( 0.2%) argc_param_mismatch: 50,382 ( 0.1%) send_without_block_not_optimized_method_type: 42,176 ( 0.1%) obj_to_string_not_string: 34,853 ( 0.1%) send_without_block_direct_keyword_mismatch: 32,436 ( 0.1%) Top-4 setivar fallback reasons (100.0% of total 2,361,107): not_monomorphic: 2,138,392 (90.6%) not_t_object: 125,163 ( 5.3%) too_complex: 97,531 ( 4.1%) new_shape_needs_extension: 21 ( 0.0%) Top-2 getivar fallback reasons (100.0% of total 6,059,319): not_monomorphic: 5,787,746 (95.5%) too_complex: 271,573 ( 4.5%) Top-3 definedivar fallback reasons (100.0% of total 405,302): not_monomorphic: 397,150 (98.0%) too_complex: 5,122 ( 1.3%) not_t_object: 3,030 ( 0.7%) Top-6 invokeblock handler (100.0% of total 1,385,975): monomorphic_iseq: 688,157 (49.7%) polymorphic: 523,861 (37.8%) monomorphic_other: 106,268 ( 7.7%) monomorphic_ifunc: 55,505 ( 4.0%) megamorphic: 6,760 ( 0.5%) no_profiles: 5,424 ( 0.4%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 1,850,658): param_forwardable: 685,941 (37.1%) param_block: 641,355 (34.7%) param_rest: 327,046 (17.7%) param_kwrest: 120,209 ( 6.5%) caller_kw_splat: 36,147 ( 2.0%) caller_splat: 34,029 ( 1.8%) caller_blockarg: 5,821 ( 0.3%) caller_kwarg: 110 ( 0.0%) Top-1 compile error reasons (100.0% of total 191,769): exception_handler: 191,769 (100.0%) Top-5 unhandled YARV insns (100.0% of total 7,611): getconstant: 3,318 (43.6%) setblockparam: 2,837 (37.3%) checkmatch: 929 (12.2%) expandarray: 360 ( 4.7%) once: 167 ( 2.2%) Top-3 unhandled HIR insns (100.0% of total 236,976): throw: 198,481 (83.8%) invokebuiltin: 35,774 (15.1%) array_max: 2,721 ( 1.1%) Top-20 side exit reasons (100.0% of total 15,343,302): guard_type_failure: 6,886,972 (44.9%) guard_shape_failure: 6,854,835 (44.7%) block_param_proxy_not_iseq_or_ifunc: 1,008,346 ( 6.6%) unhandled_hir_insn: 236,976 ( 1.5%) compile_error: 191,769 ( 1.2%) fixnum_mult_overflow: 50,739 ( 0.3%) block_param_proxy_modified: 28,119 ( 0.2%) patchpoint_stable_constant_names: 19,858 ( 0.1%) unhandled_newarray_send_pack: 14,481 ( 0.1%) unhandled_block_arg: 13,787 ( 0.1%) fixnum_lshift_overflow: 10,085 ( 0.1%) patchpoint_no_ep_escape: 7,815 ( 0.1%) unhandled_yarv_insn: 7,611 ( 0.0%) expandarray_failure: 4,533 ( 0.0%) guard_super_method_entry: 4,475 ( 0.0%) patchpoint_method_redefined: 1,212 ( 0.0%) patchpoint_no_singleton_class: 1,130 ( 0.0%) obj_to_string_fallback: 275 ( 0.0%) guard_less_failure: 163 ( 0.0%) interrupt: 102 ( 0.0%) send_count: 152,019,764 dynamic_send_count: 51,873,375 (34.1%) optimized_send_count: 100,146,389 (65.9%) dynamic_setivar_count: 2,361,107 ( 1.6%) dynamic_getivar_count: 6,059,319 ( 4.0%) dynamic_definedivar_count: 405,302 ( 0.3%) iseq_optimized_send_count: 40,149,182 (26.4%) inline_cfunc_optimized_send_count: 40,168,875 (26.4%) inline_iseq_optimized_send_count: 3,408,619 ( 2.2%) non_variadic_cfunc_optimized_send_count: 8,896,927 ( 5.9%) variadic_cfunc_optimized_send_count: 7,522,786 ( 4.9%) compiled_iseq_count: 5,554 failed_iseq_count: 0 compile_time: 1,784ms profile_time: 13ms gc_time: 19ms invalidation_time: 261ms vm_write_pc_count: 133,027,580 vm_write_sp_count: 133,027,580 vm_write_locals_count: 129,024,228 vm_write_stack_count: 129,024,228 vm_write_to_parent_iseq_local_count: 693,264 vm_read_from_parent_iseq_local_count: 14,727,716 guard_type_count: 157,500,381 guard_type_exit_ratio: 4.4% guard_shape_count: 64,160,894 guard_shape_exit_ratio: 10.7% code_region_bytes: 29,196,288 zjit_alloc_bytes: 44,686,498 total_mem_bytes: 73,882,786 side_exit_count: 15,343,302 total_insn_count: 934,219,385 vm_insn_count: 167,485,651 zjit_insn_count: 766,733,734 ratio_in_zjit: 82.1% ```
### rails-bench
before patch ``` Average of last 10, non-warmup iters: 1146ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (52.4% of total 38,306,776): Hash#key?: 3,141,619 ( 8.2%) Regexp#match?: 2,420,225 ( 6.3%) Hash#fetch: 2,245,557 ( 5.9%) Integer#===: 1,098,163 ( 2.9%) Hash#delete: 1,014,375 ( 2.6%) Array#any?: 1,007,766 ( 2.6%) String.new: 1,004,713 ( 2.6%) String#b: 797,913 ( 2.1%) String#to_sym: 680,943 ( 1.8%) Array#all?: 650,132 ( 1.7%) Fiber.current: 649,003 ( 1.7%) Array#join: 641,038 ( 1.7%) Array#include?: 613,837 ( 1.6%) Kernel#Array: 610,311 ( 1.6%) String#<<: 606,240 ( 1.6%) Symbol#end_with?: 598,807 ( 1.6%) String#force_encoding: 593,535 ( 1.5%) Kernel#dup: 580,051 ( 1.5%) Array#[]: 562,360 ( 1.5%) Kernel#respond_to?: 550,441 ( 1.4%) Top-20 calls to C functions from JIT code (75.5% of total 262,197,810): rb_vm_opt_send_without_block: 54,534,682 (20.8%) rb_hash_aref: 22,920,285 ( 8.7%) rb_vm_env_write: 19,385,633 ( 7.4%) rb_vm_send: 17,070,477 ( 6.5%) rb_zjit_writebarrier_check_immediate: 13,780,973 ( 5.3%) rb_vm_getinstancevariable: 12,379,513 ( 4.7%) rb_ivar_get_at_no_ractor_check: 12,156,906 ( 4.6%) rb_vm_invokesuper: 8,086,665 ( 3.1%) rb_hash_aset: 5,043,536 ( 1.9%) rb_obj_is_kind_of: 4,431,123 ( 1.7%) rb_vm_invokeblock: 4,036,483 ( 1.5%) Hash#key?: 3,141,619 ( 1.2%) rb_vm_opt_getconstant_path: 3,053,319 ( 1.2%) rb_class_allocate_instance: 2,878,526 ( 1.1%) rb_hash_new_with_size: 2,823,745 ( 1.1%) rb_ec_ary_new_from_values: 2,585,553 ( 1.0%) rb_str_concat_literals: 2,450,764 ( 0.9%) Regexp#match?: 2,420,225 ( 0.9%) rb_obj_alloc: 2,419,171 ( 0.9%) rb_vm_setinstancevariable: 2,357,067 ( 0.9%) Top-2 not optimized method types for send (100.0% of total 8,550,760): iseq: 8,518,289 (99.6%) optimized: 32,471 ( 0.4%) Top-2 not optimized method types for send_without_block (100.0% of total 789,641): optimized_send: 606,885 (76.9%) null: 182,756 (23.1%) Top-2 not optimized method types for super (100.0% of total 6,689,859): cfunc: 6,640,180 (99.3%) attrset: 49,679 ( 0.7%) Top-3 instructions with uncategorized fallback reason (100.0% of total 5,962,039): invokeblock: 4,036,483 (67.7%) sendforward: 1,871,601 (31.4%) opt_send_without_block: 53,955 ( 0.9%) Top-20 send fallback reasons (100.0% of total 85,599,908): send_without_block_polymorphic: 31,804,276 (37.2%) send_without_block_no_profiles: 13,349,825 (15.6%) send_not_optimized_method_type: 8,550,760 (10.0%) super_not_optimized_method_type: 6,689,859 ( 7.8%) uncategorized: 5,962,039 ( 7.0%) send_no_profiles: 5,200,278 ( 6.1%) one_or_more_complex_arg_pass: 4,198,502 ( 4.9%) send_polymorphic: 3,318,658 ( 3.9%) send_without_block_not_optimized_need_permission: 1,274,177 ( 1.5%) too_many_args_for_lir: 1,139,487 ( 1.3%) singleton_class_seen: 1,101,973 ( 1.3%) super_complex_args_pass: 829,842 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 606,885 ( 0.7%) send_without_block_megamorphic: 565,874 ( 0.7%) super_target_complex_args_pass: 414,600 ( 0.5%) send_without_block_not_optimized_method_type: 182,756 ( 0.2%) obj_to_string_not_string: 158,141 ( 0.2%) super_call_with_block: 100,004 ( 0.1%) send_without_block_direct_keyword_mismatch: 99,588 ( 0.1%) super_polymorphic: 52,360 ( 0.1%) Top-2 setivar fallback reasons (100.0% of total 2,357,067): not_monomorphic: 2,255,283 (95.7%) not_t_object: 101,784 ( 4.3%) Top-1 getivar fallback reasons (100.0% of total 12,379,538): not_monomorphic: 12,379,538 (100.0%) Top-2 definedivar fallback reasons (100.0% of total 350,548): not_monomorphic: 350,461 (100.0%) not_t_object: 87 ( 0.0%) Top-6 invokeblock handler (100.0% of total 4,036,483): monomorphic_iseq: 2,189,057 (54.2%) polymorphic: 1,207,002 (29.9%) monomorphic_other: 334,248 ( 8.3%) monomorphic_ifunc: 221,225 ( 5.5%) megamorphic: 84,439 ( 2.1%) no_profiles: 512 ( 0.0%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 5,212,154): param_forwardable: 1,824,953 (35.0%) param_block: 1,792,214 (34.4%) param_rest: 861,894 (16.5%) caller_splat: 283,669 ( 5.4%) caller_kw_splat: 248,291 ( 4.8%) param_kwrest: 200,208 ( 3.8%) caller_blockarg: 752 ( 0.0%) caller_kwarg: 173 ( 0.0%) Top-1 compile error reasons (100.0% of total 391,562): exception_handler: 391,562 (100.0%) Top-6 unhandled YARV insns (100.0% of total 1,000,531): invokesuperforward: 498,993 (49.9%) getconstant: 400,945 (40.1%) expandarray: 49,985 ( 5.0%) setblockparam: 49,972 ( 5.0%) checkmatch: 480 ( 0.0%) once: 156 ( 0.0%) Top-2 unhandled HIR insns (100.0% of total 268,151): throw: 232,560 (86.7%) invokebuiltin: 35,591 (13.3%) Top-19 side exit reasons (100.0% of total 8,709,784): guard_shape_failure: 2,497,335 (28.7%) block_param_proxy_not_iseq_or_ifunc: 1,988,408 (22.8%) guard_type_failure: 1,722,007 (19.8%) unhandled_yarv_insn: 1,000,531 (11.5%) compile_error: 391,562 ( 4.5%) unhandled_newarray_send_pack: 298,017 ( 3.4%) unhandled_hir_insn: 268,151 ( 3.1%) patchpoint_method_redefined: 200,632 ( 2.3%) unhandled_block_arg: 151,295 ( 1.7%) block_param_proxy_modified: 124,245 ( 1.4%) guard_less_failure: 50,126 ( 0.6%) fixnum_lshift_overflow: 9,985 ( 0.1%) patchpoint_stable_constant_names: 6,350 ( 0.1%) fixnum_mult_overflow: 570 ( 0.0%) obj_to_string_fallback: 405 ( 0.0%) patchpoint_no_ep_escape: 109 ( 0.0%) interrupt: 42 ( 0.0%) guard_super_method_entry: 8 ( 0.0%) guard_greater_eq_failure: 6 ( 0.0%) send_count: 329,199,237 dynamic_send_count: 85,599,908 (26.0%) optimized_send_count: 243,599,329 (74.0%) dynamic_setivar_count: 2,357,067 ( 0.7%) dynamic_getivar_count: 12,379,538 ( 3.8%) dynamic_definedivar_count: 350,548 ( 0.1%) iseq_optimized_send_count: 93,946,576 (28.5%) inline_cfunc_optimized_send_count: 97,478,983 (29.6%) inline_iseq_optimized_send_count: 9,138,886 ( 2.8%) non_variadic_cfunc_optimized_send_count: 25,367,116 ( 7.7%) variadic_cfunc_optimized_send_count: 17,667,768 ( 5.4%) compiled_iseq_count: 2,888 failed_iseq_count: 0 compile_time: 876ms profile_time: 28ms gc_time: 6ms invalidation_time: 8ms vm_write_pc_count: 287,051,837 vm_write_sp_count: 287,051,837 vm_write_locals_count: 273,948,883 vm_write_stack_count: 273,948,883 vm_write_to_parent_iseq_local_count: 1,079,877 vm_read_from_parent_iseq_local_count: 30,814,984 guard_type_count: 310,888,965 guard_type_exit_ratio: 0.6% guard_shape_count: 108,669,058 guard_shape_exit_ratio: 2.3% code_region_bytes: 14,352,384 zjit_alloc_bytes: 18,992,674 total_mem_bytes: 33,345,058 side_exit_count: 8,709,784 total_insn_count: 1,705,856,454 vm_insn_count: 122,246,885 zjit_insn_count: 1,583,609,569 ratio_in_zjit: 92.8% ```
after patch ``` Average of last 10, non-warmup iters: 1072ms ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (52.5% of total 38,239,504): Hash#key?: 3,141,619 ( 8.2%) Regexp#match?: 2,420,215 ( 6.3%) Hash#fetch: 2,245,557 ( 5.9%) Integer#===: 1,097,515 ( 2.9%) Hash#delete: 1,014,375 ( 2.7%) Array#any?: 1,007,756 ( 2.6%) String.new: 1,004,713 ( 2.6%) String#b: 797,913 ( 2.1%) String#to_sym: 680,943 ( 1.8%) Array#all?: 650,132 ( 1.7%) Fiber.current: 649,003 ( 1.7%) Array#join: 641,038 ( 1.7%) Array#include?: 613,837 ( 1.6%) Kernel#Array: 610,311 ( 1.6%) String#<<: 606,240 ( 1.6%) Symbol#end_with?: 598,807 ( 1.6%) String#force_encoding: 593,535 ( 1.6%) Kernel#dup: 580,051 ( 1.5%) Array#[]: 562,360 ( 1.5%) Kernel#respond_to?: 550,441 ( 1.4%) Top-20 calls to C functions from JIT code (75.4% of total 262,218,592): rb_vm_opt_send_without_block: 54,249,429 (20.7%) rb_hash_aref: 22,920,271 ( 8.7%) rb_vm_env_write: 19,385,609 ( 7.4%) rb_vm_send: 17,070,463 ( 6.5%) rb_zjit_writebarrier_check_immediate: 13,780,893 ( 5.3%) rb_vm_getinstancevariable: 12,322,924 ( 4.7%) rb_ivar_get_at_no_ractor_check: 12,156,898 ( 4.6%) rb_vm_invokesuper: 8,086,659 ( 3.1%) rb_hash_aset: 5,043,532 ( 1.9%) rb_obj_is_kind_of: 4,474,826 ( 1.7%) rb_vm_invokeblock: 4,036,471 ( 1.5%) Hash#key?: 3,141,619 ( 1.2%) rb_vm_opt_getconstant_path: 3,053,286 ( 1.2%) rb_class_allocate_instance: 2,878,505 ( 1.1%) rb_hash_new_with_size: 2,823,748 ( 1.1%) rb_ec_ary_new_from_values: 2,585,561 ( 1.0%) rb_str_concat_literals: 2,450,756 ( 0.9%) Regexp#match?: 2,420,215 ( 0.9%) rb_obj_alloc: 2,419,146 ( 0.9%) rb_vm_setinstancevariable: 2,357,065 ( 0.9%) Top-2 not optimized method types for send (100.0% of total 8,550,755): iseq: 8,518,284 (99.6%) optimized: 32,471 ( 0.4%) Top-2 not optimized method types for send_without_block (100.0% of total 789,641): optimized_send: 606,885 (76.9%) null: 182,756 (23.1%) Top-2 not optimized method types for super (100.0% of total 6,689,853): cfunc: 6,640,178 (99.3%) attrset: 49,675 ( 0.7%) Top-4 instructions with uncategorized fallback reason (100.0% of total 6,461,020): invokeblock: 4,036,471 (62.5%) sendforward: 1,871,601 (29.0%) invokesuperforward: 498,993 ( 7.7%) opt_send_without_block: 53,955 ( 0.8%) Top-20 send fallback reasons (100.0% of total 85,813,616): send_without_block_polymorphic: 31,519,543 (36.7%) send_without_block_no_profiles: 13,349,751 (15.6%) send_not_optimized_method_type: 8,550,755 (10.0%) super_not_optimized_method_type: 6,689,853 ( 7.8%) uncategorized: 6,461,020 ( 7.5%) send_no_profiles: 5,200,273 ( 6.1%) one_or_more_complex_arg_pass: 4,198,498 ( 4.9%) send_polymorphic: 3,318,658 ( 3.9%) send_without_block_not_optimized_need_permission: 1,273,739 ( 1.5%) too_many_args_for_lir: 1,139,487 ( 1.3%) singleton_class_seen: 1,101,973 ( 1.3%) super_complex_args_pass: 829,842 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 606,885 ( 0.7%) send_without_block_megamorphic: 565,874 ( 0.7%) super_target_complex_args_pass: 414,600 ( 0.5%) send_without_block_not_optimized_method_type: 182,756 ( 0.2%) obj_to_string_not_string: 158,133 ( 0.2%) super_call_with_block: 100,004 ( 0.1%) send_without_block_direct_keyword_mismatch: 99,588 ( 0.1%) super_polymorphic: 52,360 ( 0.1%) Top-2 setivar fallback reasons (100.0% of total 2,357,065): not_monomorphic: 2,255,281 (95.7%) not_t_object: 101,784 ( 4.3%) Top-1 getivar fallback reasons (100.0% of total 12,322,949): not_monomorphic: 12,322,949 (100.0%) Top-2 definedivar fallback reasons (100.0% of total 350,548): not_monomorphic: 350,461 (100.0%) not_t_object: 87 ( 0.0%) Top-6 invokeblock handler (100.0% of total 4,036,471): monomorphic_iseq: 2,189,045 (54.2%) polymorphic: 1,207,002 (29.9%) monomorphic_other: 334,248 ( 8.3%) monomorphic_ifunc: 221,225 ( 5.5%) megamorphic: 84,439 ( 2.1%) no_profiles: 512 ( 0.0%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 5,212,150): param_forwardable: 1,824,953 (35.0%) param_block: 1,792,214 (34.4%) param_rest: 861,894 (16.5%) caller_splat: 283,669 ( 5.4%) caller_kw_splat: 248,291 ( 4.8%) param_kwrest: 200,208 ( 3.8%) caller_blockarg: 748 ( 0.0%) caller_kwarg: 173 ( 0.0%) Top-1 compile error reasons (100.0% of total 391,562): exception_handler: 391,562 (100.0%) Top-5 unhandled YARV insns (100.0% of total 501,538): getconstant: 400,945 (79.9%) expandarray: 49,985 (10.0%) setblockparam: 49,972 (10.0%) checkmatch: 480 ( 0.1%) once: 156 ( 0.0%) Top-2 unhandled HIR insns (100.0% of total 268,152): throw: 232,560 (86.7%) invokebuiltin: 35,592 (13.3%) Top-19 side exit reasons (100.0% of total 8,210,699): guard_shape_failure: 2,497,552 (30.4%) block_param_proxy_not_iseq_or_ifunc: 1,988,408 (24.2%) guard_type_failure: 1,721,809 (21.0%) unhandled_yarv_insn: 501,538 ( 6.1%) compile_error: 391,562 ( 4.8%) unhandled_newarray_send_pack: 298,017 ( 3.6%) unhandled_hir_insn: 268,152 ( 3.3%) patchpoint_method_redefined: 200,632 ( 2.4%) unhandled_block_arg: 151,295 ( 1.8%) block_param_proxy_modified: 124,245 ( 1.5%) guard_less_failure: 50,033 ( 0.6%) fixnum_lshift_overflow: 9,985 ( 0.1%) patchpoint_stable_constant_names: 6,342 ( 0.1%) fixnum_mult_overflow: 570 ( 0.0%) obj_to_string_fallback: 405 ( 0.0%) patchpoint_no_ep_escape: 109 ( 0.0%) interrupt: 31 ( 0.0%) guard_super_method_entry: 8 ( 0.0%) guard_greater_eq_failure: 6 ( 0.0%) send_count: 328,805,013 dynamic_send_count: 85,813,616 (26.1%) optimized_send_count: 242,991,397 (73.9%) dynamic_setivar_count: 2,357,065 ( 0.7%) dynamic_getivar_count: 12,322,949 ( 3.7%) dynamic_definedivar_count: 350,548 ( 0.1%) iseq_optimized_send_count: 93,990,621 (28.6%) inline_cfunc_optimized_send_count: 96,851,696 (29.5%) inline_iseq_optimized_send_count: 9,181,467 ( 2.8%) non_variadic_cfunc_optimized_send_count: 25,304,458 ( 7.7%) variadic_cfunc_optimized_send_count: 17,663,155 ( 5.4%) compiled_iseq_count: 2,886 failed_iseq_count: 0 compile_time: 875ms profile_time: 27ms gc_time: 66ms invalidation_time: 9ms vm_write_pc_count: 287,186,308 vm_write_sp_count: 287,186,308 vm_write_locals_count: 274,139,228 vm_write_stack_count: 274,139,228 vm_write_to_parent_iseq_local_count: 1,079,877 vm_read_from_parent_iseq_local_count: 30,810,378 guard_type_count: 310,644,961 guard_type_exit_ratio: 0.6% guard_shape_count: 109,072,242 guard_shape_exit_ratio: 2.3% code_region_bytes: 14,352,384 zjit_alloc_bytes: 19,186,174 total_mem_bytes: 33,538,558 side_exit_count: 8,210,699 total_insn_count: 1,705,193,555 vm_insn_count: 123,691,343 zjit_insn_count: 1,581,502,212 ratio_in_zjit: 92.7% ```
--- test/ruby/test_zjit.rb | 40 ++++++++++++++++++++++ zjit/src/codegen.rs | 24 +++++++++++++ zjit/src/hir.rs | 58 ++++++++++++++++++++++++++++++++ zjit/src/hir/tests.rs | 76 ++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 196 insertions(+), 2 deletions(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 2066610cb27be2..6ad06f9453e9d3 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1496,6 +1496,46 @@ def test = Child.new.foo(1) }, call_threshold: 2 end + def test_invokesuperforward + assert_compiles '[1, 2, 3]', %q{ + class A + def foo(a,b,c) = [a,b,c] + end + + class B < A + def foo(...) = super + end + + def test + B.new.foo(1, 2, 3) + end + + test + test + }, call_threshold: 2 + end + + def test_invokesuperforward_with_args_kwargs_and_block + assert_compiles '[[1, 2], {x: 3}, 4]', %q{ + class A + def foo(*args, **kwargs, &block) + [args, kwargs, block&.call] + end + end + + class B < A + def foo(...) = super + end + + def test + B.new.foo(1, 2, x: 3) { 4 } + end + + test + test + }, call_threshold: 2 + end + def test_send_with_non_constant_keyword_default assert_compiles '[[2, 4, 16], [10, 4, 16], [2, 20, 16], [2, 4, 30], [10, 20, 30]]', %q{ def dbl(x = 1) = x * 2 diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 8714518866c3b4..a77bd7debd0dfc 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -481,6 +481,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason), Insn::SendWithoutBlockDirect { cme, iseq, recv, args, kw_bits, state, .. } => gen_send_iseq_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), *kw_bits, &function.frame_state(*state), None), &Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason), + &Insn::InvokeSuperForward { cd, blockiseq, state, reason, .. } => gen_invokesuperforward(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason), Insn::InvokeProc { recv, args, state, kw_splat } => gen_invokeproc(jit, asm, opnd!(recv), opnds!(args), *kw_splat, &function.frame_state(*state)), // Ensure we have enough room fit ec, self, and arguments @@ -1638,6 +1639,29 @@ fn gen_invokesuper( ) } +/// Compile a dynamic dispatch for `super` with `...` +fn gen_invokesuperforward( + jit: &mut JITState, + asm: &mut Assembler, + cd: *const rb_call_data, + blockiseq: IseqPtr, + state: &FrameState, + reason: SendFallbackReason, +) -> lir::Opnd { + gen_incr_send_fallback_counter(asm, reason); + + gen_prepare_non_leaf_call(jit, asm, state); + asm_comment!(asm, "call super with dynamic dispatch (forwarding)"); + unsafe extern "C" { + fn rb_vm_invokesuperforward(ec: EcPtr, cfp: CfpPtr, cd: VALUE, blockiseq: IseqPtr) -> VALUE; + } + asm_ccall!( + asm, + rb_vm_invokesuperforward, + EC, CFP, Opnd::const_ptr(cd), VALUE::from(blockiseq).into() + ) +} + /// Compile a string resurrection fn gen_string_copy(asm: &mut Assembler, recv: Opnd, chilled: bool, state: &FrameState) -> Opnd { // TODO: split rb_ec_str_resurrect into separate functions diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 79004c8737e4ee..51ab45937cb5d2 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -936,6 +936,14 @@ pub enum Insn { state: InsnId, reason: SendFallbackReason, }, + InvokeSuperForward { + recv: InsnId, + cd: *const rb_call_data, + blockiseq: IseqPtr, + args: Vec, + state: InsnId, + reason: SendFallbackReason, + }, InvokeBlock { cd: *const rb_call_data, args: Vec, @@ -1183,6 +1191,7 @@ impl Insn { Insn::Send { .. } => effects::Any, Insn::SendForward { .. } => effects::Any, Insn::InvokeSuper { .. } => effects::Any, + Insn::InvokeSuperForward { .. } => effects::Any, Insn::InvokeBlock { .. } => effects::Any, Insn::SendWithoutBlockDirect { .. } => effects::Any, Insn::InvokeBuiltin { .. } => effects::Any, @@ -1471,6 +1480,14 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { write!(f, " # SendFallbackReason: {reason}")?; Ok(()) } + Insn::InvokeSuperForward { recv, blockiseq, args, reason, .. } => { + write!(f, "InvokeSuperForward {recv}, {:p}", self.ptr_map.map_ptr(blockiseq))?; + for arg in args { + write!(f, ", {arg}")?; + } + write!(f, " # SendFallbackReason: {reason}")?; + Ok(()) + } Insn::InvokeBlock { args, reason, .. } => { write!(f, "InvokeBlock")?; for arg in args { @@ -2277,6 +2294,14 @@ impl Function { state, reason, }, + &InvokeSuperForward { recv, cd, blockiseq, ref args, state, reason } => InvokeSuperForward { + recv: find!(recv), + cd, + blockiseq, + args: find_vec!(args), + state, + reason, + }, &InvokeBlock { cd, ref args, state, reason } => InvokeBlock { cd, args: find_vec!(args), @@ -2356,6 +2381,7 @@ impl Function { | SendForward { reason, .. } | SendWithoutBlock { reason, .. } | InvokeSuper { reason, .. } + | InvokeSuperForward { reason, .. } | InvokeBlock { reason, .. } => *reason = dynamic_send_reason, _ => unreachable!("unexpected instruction {} at {insn_id}", self.find(insn_id)) @@ -2477,6 +2503,7 @@ impl Function { Insn::Send { .. } => types::BasicObject, Insn::SendForward { .. } => types::BasicObject, Insn::InvokeSuper { .. } => types::BasicObject, + Insn::InvokeSuperForward { .. } => types::BasicObject, Insn::InvokeBlock { .. } => types::BasicObject, Insn::InvokeProc { .. } => types::BasicObject, Insn::InvokeBuiltin { return_type, .. } => return_type.unwrap_or(types::BasicObject), @@ -4592,6 +4619,7 @@ impl Function { | &Insn::SendWithoutBlockDirect { recv, ref args, state, .. } | &Insn::InvokeBuiltin { recv, ref args, state, .. } | &Insn::InvokeSuper { recv, ref args, state, .. } + | &Insn::InvokeSuperForward { recv, ref args, state, .. } | &Insn::InvokeProc { recv, ref args, state, .. } => { worklist.push_back(recv); worklist.extend(args); @@ -5252,6 +5280,7 @@ impl Function { | Insn::Send { recv, ref args, .. } | Insn::SendForward { recv, ref args, .. } | Insn::InvokeSuper { recv, ref args, .. } + | Insn::InvokeSuperForward { recv, ref args, .. } | Insn::CCallWithFrame { recv, ref args, .. } | Insn::CCallVariadic { recv, ref args, .. } | Insn::InvokeBuiltin { recv, ref args, .. } @@ -6827,6 +6856,35 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } } } + YARVINSN_invokesuperforward => { + let cd: *const rb_call_data = get_arg(pc, 0).as_ptr(); + let blockiseq: IseqPtr = get_arg(pc, 1).as_iseq(); + let call_info = unsafe { rb_get_call_data_ci(cd) }; + let flags = unsafe { rb_vm_ci_flag(call_info) }; + let forwarding = (flags & VM_CALL_FORWARDING) != 0; + if let Err(call_type) = unhandled_call_type(flags) { + // Can't handle tailcall; side-exit into the interpreter + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnhandledCallType(call_type) }); + break; // End the block + } + let argc = unsafe { vm_ci_argc((*cd).ci) }; + let args = state.stack_pop_n(argc as usize + usize::from(forwarding))?; + let recv = state.stack_pop()?; + let result = fun.push_insn(block, Insn::InvokeSuperForward { recv, cd, blockiseq, args, state: exit_id, reason: Uncategorized(opcode) }); + state.stack_push(result); + + if !blockiseq.is_null() { + // Reload locals that may have been modified by the blockiseq. + // TODO: Avoid reloading locals that are not referenced by the blockiseq + // or not used after this. Max thinks we could eventually DCE them. + for local_idx in 0..state.locals.len() { + let ep_offset = local_idx_to_ep_offset(iseq, local_idx) as u32; + // TODO: We could use `use_sp: true` with PatchPoint + let val = fun.push_insn(block, Insn::GetLocal { ep_offset, level: 0, use_sp: false, rest_param: false }); + state.setlocal(ep_offset, val); + } + } + } YARVINSN_invokeblock => { let cd: *const rb_call_data = get_arg(pc, 0).as_ptr(); let call_info = unsafe { rb_get_call_data_ci(cd) }; diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index 56f1928f1fa753..c21402449f52fb 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -1843,7 +1843,7 @@ pub mod hir_build_tests { } #[test] - fn test_cant_compile_super_forward() { + fn test_compile_super_forward() { eval(" def test(...) = super(...) "); @@ -1858,7 +1858,79 @@ pub mod hir_build_tests { EntryPoint JIT(0) Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): - SideExit UnhandledYARVInsn(invokesuperforward) + v15:BasicObject = InvokeSuperForward v8, 0x1000, v9 # SendFallbackReason: Uncategorized(invokesuperforward) + CheckInterrupts + Return v15 + "); + } + + #[test] + fn test_compile_super_forward_with_block() { + eval(" + def test(...) = super { |x| x } + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :..., l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v15:BasicObject = InvokeSuperForward v8, 0x1000, v9 # SendFallbackReason: Uncategorized(invokesuperforward) + v16:BasicObject = GetLocal :..., l0, EP@3 + CheckInterrupts + Return v15 + "); + } + + #[test] + fn test_compile_super_forward_with_use() { + eval(" + def test(...) = super(...) + 1 + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :..., l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v15:BasicObject = InvokeSuperForward v8, 0x1000, v9 # SendFallbackReason: Uncategorized(invokesuperforward) + v17:Fixnum[1] = Const Value(1) + v20:BasicObject = SendWithoutBlock v15, :+, v17 # SendFallbackReason: Uncategorized(opt_plus) + CheckInterrupts + Return v20 + "); + } + + #[test] + fn test_compile_super_forward_with_arg() { + eval(" + def test(...) = super(1, ...) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :..., l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:Fixnum[1] = Const Value(1) + v17:BasicObject = InvokeSuperForward v8, 0x1000, v14, v9 # SendFallbackReason: Uncategorized(invokesuperforward) + CheckInterrupts + Return v17 "); } From 39b28e67a6363f9ffe1f478298984414083c96d4 Mon Sep 17 00:00:00 2001 From: Randy Stauner Date: Tue, 27 Jan 2026 13:00:33 -0700 Subject: [PATCH 24/77] ZJIT: Remove unused import to eliminate build warning (#15984) --- zjit/src/backend/arm64/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 6ed855ddf9c688..ee15627d898dd9 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -1717,7 +1717,6 @@ mod tests { use super::*; use insta::assert_snapshot; - use crate::hir; static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG]; From c983b7aee631db46002ba7438089d792c2b0298b Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 27 Jan 2026 14:55:09 -0500 Subject: [PATCH 25/77] [ruby/prism] Rename line_to_byte_offset -> byte_offset Also, include the column in here. Hopefully we can do some additional optimizations later. https://github.com/ruby/prism/commit/7759acdd26 --- lib/prism/lex_compat.rb | 8 ++-- lib/prism/parse_result.rb | 14 +++---- prism/templates/lib/prism/node.rb.erb | 5 +-- test/prism/ruby/source_test.rb | 60 ++++++++++++++------------- 4 files changed, 46 insertions(+), 41 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 523ad39586b4be..4960230bcf1499 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -816,7 +816,7 @@ def result # Manually implemented instead of `sort_by!(&:location)` for performance. tokens.sort_by! do |token| line, column = token.location - source.line_to_byte_offset(line) + column + source.byte_offset(line, column) end # Add :on_sp tokens @@ -833,8 +833,10 @@ def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token) tokens.each do |token| line, column = token.location - start_offset = source.line_to_byte_offset(line) + column - # Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset + start_offset = source.byte_offset(line, column) + + # Ripper reports columns on line 1 without counting the BOM, so we + # adjust to get the real offset start_offset += 3 if line == 1 && bom if start_offset > prev_token_end diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 12d19da5629a76..be1c13f97c917d 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -76,13 +76,13 @@ def slice(byte_offset, length) source.byteslice(byte_offset, length) or raise end - # Converts the line number to a byte offset corresponding to the start of that line - def line_to_byte_offset(line) - l = line - @start_line - if l < 0 || l >= offsets.size - raise ArgumentError, "line #{line} is out of range" - end - offsets[l] + # Converts the line number and column in bytes to a byte offset. + def byte_offset(line, column) + normal = line - @start_line + raise IndexError if normal < 0 + offsets.fetch(normal) + column + rescue IndexError + raise ArgumentError, "line #{line} is out of range" end # Binary search through the offsets to find the line number for the given diff --git a/prism/templates/lib/prism/node.rb.erb b/prism/templates/lib/prism/node.rb.erb index 8225bfb328e8da..6f8e8b0accabc8 100644 --- a/prism/templates/lib/prism/node.rb.erb +++ b/prism/templates/lib/prism/node.rb.erb @@ -183,14 +183,13 @@ module Prism def tunnel(line, column) queue = [self] #: Array[Prism::node] result = [] #: Array[Prism::node] - - search_offset = source.line_to_byte_offset(line) + column + offset = source.byte_offset(line, column) while (node = queue.shift) result << node node.each_child_node do |child_node| - if child_node.start_offset <= search_offset && search_offset < child_node.end_offset + if child_node.start_offset <= offset && offset < child_node.end_offset queue << child_node break end diff --git a/test/prism/ruby/source_test.rb b/test/prism/ruby/source_test.rb index afd2825765f7e0..f7cf4fe83a2ec3 100644 --- a/test/prism/ruby/source_test.rb +++ b/test/prism/ruby/source_test.rb @@ -4,44 +4,48 @@ module Prism class SourceTest < TestCase - def test_line_to_byte_offset - parse_result = Prism.parse(<<~SRC) + def test_byte_offset + source = Prism.parse(<<~SRC).source abcd efgh ijkl SRC - source = parse_result.source - - assert_equal 0, source.line_to_byte_offset(1) - assert_equal 5, source.line_to_byte_offset(2) - assert_equal 10, source.line_to_byte_offset(3) - assert_equal 15, source.line_to_byte_offset(4) - e = assert_raise(ArgumentError) { source.line_to_byte_offset(5) } - assert_equal "line 5 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(0) } - assert_equal "line 0 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(-1) } - assert_equal "line -1 is out of range", e.message + + assert_equal 0, source.byte_offset(1, 0) + assert_equal 5, source.byte_offset(2, 0) + assert_equal 10, source.byte_offset(3, 0) + assert_equal 15, source.byte_offset(4, 0) + + error = assert_raise(ArgumentError) { source.byte_offset(5, 0) } + assert_equal "line 5 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(0, 0) } + assert_equal "line 0 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(-1, 0) } + assert_equal "line -1 is out of range", error.message end - def test_line_to_byte_offset_with_start_line - parse_result = Prism.parse(<<~SRC, line: 11) + def test_byte_offset_with_start_line + source = Prism.parse(<<~SRC, line: 11).source abcd efgh ijkl SRC - source = parse_result.source - - assert_equal 0, source.line_to_byte_offset(11) - assert_equal 5, source.line_to_byte_offset(12) - assert_equal 10, source.line_to_byte_offset(13) - assert_equal 15, source.line_to_byte_offset(14) - e = assert_raise(ArgumentError) { source.line_to_byte_offset(15) } - assert_equal "line 15 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(10) } - assert_equal "line 10 is out of range", e.message - e = assert_raise(ArgumentError) { source.line_to_byte_offset(9) } - assert_equal "line 9 is out of range", e.message + + assert_equal 0, source.byte_offset(11, 0) + assert_equal 5, source.byte_offset(12, 0) + assert_equal 10, source.byte_offset(13, 0) + assert_equal 15, source.byte_offset(14, 0) + + error = assert_raise(ArgumentError) { source.byte_offset(15, 0) } + assert_equal "line 15 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(10, 0) } + assert_equal "line 10 is out of range", error.message + + error = assert_raise(ArgumentError) { source.byte_offset(9, 0) } + assert_equal "line 9 is out of range", error.message end end end From 68902e3593d3cfeb219164270d633036deb9b240 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 27 Jan 2026 15:08:55 -0500 Subject: [PATCH 26/77] [ruby/prism] Add Prism::Node#find_all https://github.com/ruby/prism/commit/51df90ef04 --- prism/templates/lib/prism/node.rb.erb | 22 ++++++++++++++++++- .../prism/result/breadth_first_search_test.rb | 11 ++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/prism/templates/lib/prism/node.rb.erb b/prism/templates/lib/prism/node.rb.erb index 6f8e8b0accabc8..d14a06961a577d 100644 --- a/prism/templates/lib/prism/node.rb.erb +++ b/prism/templates/lib/prism/node.rb.erb @@ -200,7 +200,7 @@ module Prism end # Returns the first node that matches the given block when visited in a - # depth-first search. This is useful for finding a node that matches a + # breadth-first search. This is useful for finding a node that matches a # particular condition. # # node.breadth_first_search { |node| node.node_id == node_id } @@ -215,6 +215,26 @@ module Prism nil end + alias find breadth_first_search + + # Returns all of the nodes that match the given block when visited in a + # breadth-first search. This is useful for finding all nodes that match a + # particular condition. + # + # node.breadth_first_search_all { |node| node.is_a?(Prism::CallNode) } + # + def breadth_first_search_all(&block) + queue = [self] #: Array[Prism::node] + results = [] #: Array[Prism::node] + + while (node = queue.shift) + results << node if yield node + queue.concat(node.compact_child_nodes) + end + + results + end + alias find_all breadth_first_search_all # Returns a list of the fields that exist for this node class. Fields # describe the structure of the node. This kind of reflection is useful for diff --git a/test/prism/result/breadth_first_search_test.rb b/test/prism/result/breadth_first_search_test.rb index e2e043a902102e..7e7962f1724e47 100644 --- a/test/prism/result/breadth_first_search_test.rb +++ b/test/prism/result/breadth_first_search_test.rb @@ -14,5 +14,16 @@ def test_breadth_first_search refute_nil found assert_equal 8, found.start_offset end + + def test_breadth_first_search_all + result = Prism.parse("[1 + 2, 2]") + found_nodes = + result.value.breadth_first_search_all do |node| + node.is_a?(IntegerNode) + end + + assert_equal 3, found_nodes.size + assert_equal 8, found_nodes[0].start_offset + end end end From ec154654a99c07d065108e9c31793eb9ccbd9ad0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 27 Jan 2026 15:20:36 -0500 Subject: [PATCH 27/77] [ruby/prism] Bump to v1.9.0 https://github.com/ruby/prism/commit/e722e577ef --- lib/prism/prism.gemspec | 2 +- prism/extension.h | 2 +- prism/templates/lib/prism/serialize.rb.erb | 2 +- prism/version.h | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index 8c9b140f0e342b..20c66a562e9d32 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -2,7 +2,7 @@ Gem::Specification.new do |spec| spec.name = "prism" - spec.version = "1.8.0" + spec.version = "1.9.0" spec.authors = ["Shopify"] spec.email = ["ruby@shopify.com"] diff --git a/prism/extension.h b/prism/extension.h index 510faa48e8dfed..4ddc3a7b8617d0 100644 --- a/prism/extension.h +++ b/prism/extension.h @@ -1,7 +1,7 @@ #ifndef PRISM_EXT_NODE_H #define PRISM_EXT_NODE_H -#define EXPECTED_PRISM_VERSION "1.8.0" +#define EXPECTED_PRISM_VERSION "1.9.0" #include #include diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb index 6902df5c0159d2..2275d685ca7942 100644 --- a/prism/templates/lib/prism/serialize.rb.erb +++ b/prism/templates/lib/prism/serialize.rb.erb @@ -10,7 +10,7 @@ module Prism # The minor version of prism that we are expecting to find in the serialized # strings. - MINOR_VERSION = 8 + MINOR_VERSION = 9 # The patch version of prism that we are expecting to find in the serialized # strings. diff --git a/prism/version.h b/prism/version.h index 0ef7435c1741e7..b95611f96c78e8 100644 --- a/prism/version.h +++ b/prism/version.h @@ -14,7 +14,7 @@ /** * The minor version of the Prism library as an int. */ -#define PRISM_VERSION_MINOR 8 +#define PRISM_VERSION_MINOR 9 /** * The patch version of the Prism library as an int. @@ -24,6 +24,6 @@ /** * The version of the Prism library as a constant string. */ -#define PRISM_VERSION "1.8.0" +#define PRISM_VERSION "1.9.0" #endif From af4a1ca021845837fd7bfb1e1b4b5abd7e336c34 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 26 Jan 2026 21:36:20 -0500 Subject: [PATCH 28/77] Use slices instead of locations In the C API, we want to use slices instead of locations in the AST. In this case a "slice" is effectively the same thing as the location, expect it is represented using a 32-bit offset and a 32-bit length. This will cut down on half of the space of all of the locations in the AST. Note that from the Ruby/Java/JavaScript side, this is effectively an invisible change. This only impacts the C/Rust side. --- gems/bundled_gems | 2 +- iseq.c | 2 +- lib/prism/translation/parser/compiler.rb | 2 +- lib/prism/translation/parser/lexer.rb | 2 - lib/prism/translation/ripper.rb | 13 +- prism/config.yml | 4 - prism/defines.h | 33 + prism/extension.c | 30 +- prism/parser.h | 14 +- prism/prism.c | 2778 ++++++++--------- prism/prism.h | 3 +- prism/static_literals.c | 18 +- prism/static_literals.h | 6 +- prism/templates/ext/prism/api_node.c.erb | 19 +- prism/templates/include/prism/ast.h.erb | 32 +- .../templates/include/prism/diagnostic.h.erb | 12 +- prism/templates/src/diagnostic.c.erb | 14 +- prism/templates/src/node.c.erb | 14 +- prism/templates/src/prettyprint.c.erb | 8 +- prism/templates/src/serialize.c.erb | 60 +- prism/templates/src/token_type.c.erb | 4 - prism/util/pm_char.c | 8 +- prism/util/pm_char.h | 4 +- prism/util/pm_newline_list.c | 42 +- prism/util/pm_newline_list.h | 23 +- prism/util/pm_strpbrk.c | 20 +- prism_compile.c | 64 +- ruby.c | 8 +- test/prism/errors_test.rb | 8 +- test/prism/result/overlap_test.rb | 9 +- test/prism/result/source_location_test.rb | 8 +- 31 files changed, 1581 insertions(+), 1683 deletions(-) diff --git a/gems/bundled_gems b/gems/bundled_gems index c8414dee7532cc..98a6ca2cea8ea2 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -34,7 +34,7 @@ drb 2.2.3 https://github.com/ruby/drb nkf 0.2.0 https://github.com/ruby/nkf syslog 0.3.0 https://github.com/ruby/syslog csv 3.3.5 https://github.com/ruby/csv -repl_type_completor 0.1.12 https://github.com/ruby/repl_type_completor +repl_type_completor 0.1.12 https://github.com/ruby/repl_type_completor 26b8e964557690c0b539cff8940bcfb1591f1fe6 ostruct 0.6.3 https://github.com/ruby/ostruct pstore 0.2.0 https://github.com/ruby/pstore benchmark 0.5.0 https://github.com/ruby/benchmark diff --git a/iseq.c b/iseq.c index 97047794b1e904..88aa29dce32ecc 100644 --- a/iseq.c +++ b/iseq.c @@ -1142,7 +1142,7 @@ pm_iseq_new_with_opt(pm_scope_node_t *node, VALUE name, VALUE path, VALUE realpa int32_t start_line = node->parser->start_line; pm_line_column_t start = pm_newline_list_line_column(&node->parser->newline_list, location->start, start_line); - pm_line_column_t end = pm_newline_list_line_column(&node->parser->newline_list, location->end, start_line); + pm_line_column_t end = pm_newline_list_line_column(&node->parser->newline_list, location->start + location->length, start_line); rb_code_location_t code_location = (rb_code_location_t) { .beg_pos = { .lineno = (int) start.line, .column = (int) start.column }, diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb index 88056146036411..bd3618b16289e7 100644 --- a/lib/prism/translation/parser/compiler.rb +++ b/lib/prism/translation/parser/compiler.rb @@ -1767,7 +1767,7 @@ def visit_symbol_node(node) end else parts = - if node.value == "" + if node.value_loc.nil? [] elsif node.value.include?("\n") string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening) diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 75c48ef667c642..0491e79cd212e6 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -18,8 +18,6 @@ class Lexer # The direct translating of types between the two lexers. TYPES = { # These tokens should never appear in the output of the lexer. - MISSING: nil, - NOT_PROVIDED: nil, EMBDOC_END: nil, EMBDOC_LINE: nil, diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index 735217d2e03608..70f72132f64bd9 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -3152,14 +3152,13 @@ def visit_super_node(node) # :foo # ^^^^ def visit_symbol_node(node) - if (opening = node.opening)&.match?(/^%s|['"]:?$/) + if node.value_loc.nil? + bounds(node.location) + on_dyna_symbol(on_string_content) + elsif (opening = node.opening)&.match?(/^%s|['"]:?$/) bounds(node.value_loc) - content = on_string_content - - if !(value = node.value).empty? - content = on_string_add(content, on_tstring_content(value)) - end - + content = on_string_add(on_string_content, on_tstring_content(node.value)) + bounds(node.location) on_dyna_symbol(content) elsif (closing = node.closing) == ":" bounds(node.location) diff --git a/prism/config.yml b/prism/config.yml index 4e5b077a351ff1..4e1560481e9d9e 100644 --- a/prism/config.yml +++ b/prism/config.yml @@ -653,10 +653,6 @@ tokens: comment: "a separator between words in a list" - name: __END__ comment: "marker for the point in the file at which the parser should stop" - - name: MISSING - comment: "a token that was expected but not found" - - name: NOT_PROVIDED - comment: "a token that was not present but it is okay" flags: - name: ArgumentsNodeFlags values: diff --git a/prism/defines.h b/prism/defines.h index e31429c7896dc5..c41e6031a3fefc 100644 --- a/prism/defines.h +++ b/prism/defines.h @@ -257,4 +257,37 @@ #define PRISM_FALLTHROUGH #endif +/** + * We need to align nodes in the AST to a pointer boundary so that it can be + * safely cast to different node types. Use PRISM_ALIGNAS/PRISM_ALIGNOF to + * specify alignment in a compiler-agnostic way. + */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */ + #include + + /** Specify alignment for a type or variable. */ + #define PRISM_ALIGNAS(size) alignas(size) + + /** Get the alignment requirement of a type. */ + #define PRISM_ALIGNOF(type) alignof(type) +#elif defined(__GNUC__) || defined(__clang__) + /** Specify alignment for a type or variable. */ + #define PRISM_ALIGNAS(size) __attribute__((aligned(size))) + + /** Get the alignment requirement of a type. */ + #define PRISM_ALIGNOF(type) __alignof__(type) +#elif defined(_MSC_VER) + /** Specify alignment for a type or variable. */ + #define PRISM_ALIGNAS(size) __declspec(align(size)) + + /** Get the alignment requirement of a type. */ + #define PRISM_ALIGNOF(type) __alignof(type) +#else + /** Void because this platform does not support specifying alignment. */ + #define PRISM_ALIGNAS(size) + + /** Fallback to sizeof as alignment requirement of a type. */ + #define PRISM_ALIGNOF(type) sizeof(type) +#endif + #endif diff --git a/prism/extension.c b/prism/extension.c index 71c2d91b98d0f0..400546a4ce0364 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -455,23 +455,23 @@ rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool free * Create a new Location instance from the given parser and bounds. */ static inline VALUE -parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) { - VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) }; +parser_location(VALUE source, bool freeze, uint32_t start, uint32_t length) { + VALUE argv[] = { source, LONG2FIX(start), LONG2FIX(length) }; return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze); } /** * Create a new Location instance from the given parser and location. */ -#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \ - parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start)) +#define PARSER_LOCATION(source, freeze, location) \ + parser_location(source, freeze, location.start, location.length) /** * Build a new Comment instance from the given parser and comment. */ static inline VALUE -parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) { - VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) }; +parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) { + VALUE argv[] = { PARSER_LOCATION(source, freeze, comment->location) }; VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment; return rb_class_new_instance_freeze(1, argv, type, freeze); } @@ -488,7 +488,7 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) { comment != NULL; comment = (const pm_comment_t *) comment->node.next ) { - VALUE value = parser_comment(parser, source, freeze, comment); + VALUE value = parser_comment(source, freeze, comment); rb_ary_push(comments, value); } @@ -500,9 +500,9 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) { * Build a new MagicComment instance from the given parser and magic comment. */ static inline VALUE -parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) { - VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length); - VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length); +parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) { + VALUE key_loc = parser_location(source, freeze, magic_comment->key.start, magic_comment->key.length); + VALUE value_loc = parser_location(source, freeze, magic_comment->value.start, magic_comment->value.length); VALUE argv[] = { key_loc, value_loc }; return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze); } @@ -519,7 +519,7 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) { magic_comment != NULL; magic_comment = (const pm_magic_comment_t *) magic_comment->node.next ) { - VALUE value = parser_magic_comment(parser, source, freeze, magic_comment); + VALUE value = parser_magic_comment(source, freeze, magic_comment); rb_ary_push(magic_comments, value); } @@ -533,10 +533,10 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) { */ static VALUE parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) { - if (parser->data_loc.end == NULL) { + if (parser->data_loc.length == 0) { return Qnil; } else { - return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc); + return parser_location(source, freeze, parser->data_loc.start, parser->data_loc.length); } } @@ -554,7 +554,7 @@ parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bo ) { VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id))); VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding)); - VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location); + VALUE location = PARSER_LOCATION(source, freeze, error->location); VALUE level = Qnil; switch (error->level) { @@ -594,7 +594,7 @@ parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, ) { VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id))); VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding)); - VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location); + VALUE location = PARSER_LOCATION(source, freeze, warning->location); VALUE level = Qnil; switch (warning->level) { diff --git a/prism/parser.h b/prism/parser.h index 95d7aac7108c98..a8d840d3bfd5b2 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -479,17 +479,11 @@ typedef struct { /** The embedded base node. */ pm_list_node_t node; - /** A pointer to the start of the key in the source. */ - const uint8_t *key_start; + /** The key of the magic comment. */ + pm_location_t key; - /** A pointer to the start of the value in the source. */ - const uint8_t *value_start; - - /** The length of the key in the source. */ - uint32_t key_length; - - /** The length of the value in the source. */ - uint32_t value_length; + /** The value of the magic comment. */ + pm_location_t value; } pm_magic_comment_t; /** diff --git a/prism/prism.c b/prism/prism.c index b158e505b2dc82..2c039612850e92 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -19,22 +19,49 @@ pm_version(void) { #define MAX(a,b) (((a)>(b))?(a):(b)) /******************************************************************************/ -/* Helpful AST-related macros */ +/* Helpful AST-related macros */ /******************************************************************************/ +#define U32(value_) ((uint32_t) (value_)) + #define FL PM_NODE_FLAGS #define UP PM_NODE_UPCAST -#define PM_TOKEN_START(token_) ((token_)->start) -#define PM_TOKEN_END(token_) ((token_)->end) +#define PM_LOCATION_START(location_) ((location_)->start) +#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length) + +#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start) +#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start) +#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start) +#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start) #define PM_NODE_START(node_) (UP(node_)->location.start) -#define PM_NODE_END(node_) (UP(node_)->location.end) +#define PM_NODE_LENGTH(node_) (UP(node_)->location.length) +#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length) +#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_)) + +#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_)) +#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_)) + +#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_)) +#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_)) +#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_)) +#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_)) +#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_)) -#define PM_LOCATION_NULL_VALUE(parser_) ((pm_location_t) { .start = (parser_)->start, .end = (parser_)->start }) -#define PM_LOCATION_TOKEN_VALUE(token_) ((pm_location_t) { .start = PM_TOKEN_START(token_), .end = PM_TOKEN_END(token_) }) -#define PM_LOCATION_NODE_VALUE(node_) ((pm_location_t) { .start = PM_NODE_START(node_), .end = PM_NODE_END(node_) }) -#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? ((pm_location_t) { 0 }) : PM_LOCATION_TOKEN_VALUE(token)) +#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) }) +#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0) +#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_)) +#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location + +#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_)) +#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_)) +#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_)) +#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_)) + +#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_) +#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_)) +#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_)) /******************************************************************************/ /* Lex mode manipulations */ @@ -422,15 +449,18 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call * Append an error to the list of errors on the parser. */ static inline void -pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) { - pm_diagnostic_list_append(&parser->error_list, start, end, diag_id); +pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { + pm_diagnostic_list_append(&parser->error_list, start, length, diag_id); } /** - * Append an error to the list of errors on the parser using a format string. + * Append an error to the list of errors on the parser using the location of the + * given token. */ -#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \ - pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__) +static inline void +pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { + pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id); +} /** * Append an error to the list of errors on the parser using the location of the @@ -438,15 +468,17 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_ */ static inline void pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, parser->current.start, parser->current.end, diag_id); + pm_parser_err_token(parser, &parser->current, diag_id); } /** - * Append an error to the list of errors on the parser using the given location - * using a format string. + * Append an error to the list of errors on the parser using the location of the + * previous token. */ -#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \ - PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__) +static inline void +pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { + pm_parser_err_token(parser, &parser->previous, diag_id); +} /** * Append an error to the list of errors on the parser using the location of the @@ -454,61 +486,49 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { */ static inline void pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, node->location.start, node->location.end, diag_id); + pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id); } /** - * Append an error to the list of errors on the parser using the location of the - * given node and a format string. - */ -#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \ - PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__) - -/** - * Append an error to the list of errors on the parser using the location of the - * given node and a format string, and add on the content of the node. + * Append an error to the list of errors on the parser using a format string. */ -#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \ - PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start) +#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \ + pm_diagnostic_list_append_format(&(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the - * previous token. + * given node and a format string. */ -static inline void -pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id); -} +#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \ + PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the - * given token. + * given node and a format string, and add on the content of the node. */ -static inline void -pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, token->start, token->end, diag_id); -} +#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \ + PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_))) /** * Append an error to the list of errors on the parser using the location of the * given token and a format string. */ -#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \ - PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__) +#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \ + PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the * given token and a format string, and add on the content of the token. */ -#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \ - PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start) +#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \ + PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start) /** * Append a warning to the list of warnings on the parser. */ static inline void -pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) { - pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id); +pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { + pm_diagnostic_list_append(&parser->warning_list, start, length, diag_id); } /** @@ -517,7 +537,7 @@ pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm */ static inline void pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { - pm_parser_warn(parser, token->start, token->end, diag_id); + pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id); } /** @@ -526,35 +546,36 @@ pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic */ static inline void pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) { - pm_parser_warn(parser, node->location.start, node->location.end, diag_id); + pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id); } /** - * Append a warning to the list of warnings on the parser using a format string. + * Append a warning to the list of warnings on the parser using a format string + * and the given location. */ -#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \ - pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__) +#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \ + pm_diagnostic_list_append_format(&(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__) /** * Append a warning to the list of warnings on the parser using the location of * the given token and a format string. */ -#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \ - PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__) +#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \ + PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__) /** * Append a warning to the list of warnings on the parser using the location of * the given token and a format string, and add on the content of the token. */ -#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \ - PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start) +#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \ + PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start) /** * Append a warning to the list of warnings on the parser using the location of * the given node and a format string. */ -#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \ - PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__) +#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \ + PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__) /** * Add an error for an expected heredoc terminator. This is a special function @@ -565,8 +586,8 @@ static void pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) { PM_PARSER_ERR_FORMAT( parser, - ident_start, - ident_start + ident_length, + U32(ident_start - parser->start), + U32(ident_length), PM_ERR_HEREDOC_TERM, (int) ident_length, (const char *) ident_start @@ -828,7 +849,7 @@ pm_locals_resize(pm_locals_t *locals) { * @return True if the local was added, and false if the local already exists. */ static bool -pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) { +pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) { if (locals->size >= (locals->capacity / 4 * 3)) { pm_locals_resize(locals); } @@ -840,7 +861,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start if (local->name == PM_CONSTANT_ID_UNSET) { *local = (pm_local_t) { .name = name, - .location = { .start = start, .end = end }, + .location = { .start = start, .length = length }, .index = locals->size++, .reads = reads, .hash = 0 @@ -861,7 +882,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start if (local->name == PM_CONSTANT_ID_UNSET) { *local = (pm_local_t) { .name = name, - .location = { .start = start, .end = end }, + .location = { .start = start, .length = length }, .index = locals->size++, .reads = reads, .hash = initial_hash @@ -986,7 +1007,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, PM_PARSER_WARN_FORMAT( parser, local->location.start, - local->location.end, + local->location.length, PM_WARN_UNUSED_LOCAL_VARIABLE, (int) constant->length, (const char *) constant->start @@ -1005,7 +1026,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, * Retrieve the constant pool id for the given location. */ static inline pm_constant_id_t -pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { +pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start)); } @@ -1030,16 +1051,7 @@ pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t le */ static inline pm_constant_id_t pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) { - return pm_parser_constant_id_location(parser, token->start, token->end); -} - -/** - * Retrieve the constant pool id for the given token. If the token is not - * provided, then return 0. - */ -static inline pm_constant_id_t -pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) { - return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token); + return pm_parser_constant_id_raw(parser, token->start, token->end); } /** @@ -1211,7 +1223,7 @@ pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) { break; case PM_CALL_NODE: { const pm_call_node_t *cast = (const pm_call_node_t *) node; - if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break; + if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break; const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name); switch (message->length) { @@ -1564,19 +1576,6 @@ pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_pr } } -/** - * In a lot of places in the tree you can have tokens that are not provided but - * that do not cause an error. For example, this happens in a method call - * without parentheses. In these cases we set the token to the "not provided" type. - * For example: - * - * pm_token_t token = not_provided(parser); - */ -static inline pm_token_t -not_provided(pm_parser_t *parser) { - return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start }; -} - /** * This is a special out parameter to the parse_arguments_list function that * includes opening and closing parentheses in addition to the arguments since @@ -1603,22 +1602,29 @@ typedef struct { /** * Retrieve the end location of a `pm_arguments_t` object. */ -static inline const uint8_t * +static inline const pm_location_t * pm_arguments_end(pm_arguments_t *arguments) { if (arguments->block != NULL) { - const uint8_t *end = arguments->block->location.end; - if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) { - end = arguments->closing_loc.end; + uint32_t end = PM_NODE_END(arguments->block); + + if (arguments->closing_loc.length > 0) { + uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc); + if (arguments_end > end) { + return &arguments->closing_loc; + } } - return end; + return &arguments->block->location; } - if (arguments->closing_loc.start != NULL) { - return arguments->closing_loc.end; + if (arguments->closing_loc.length > 0) { + return &arguments->closing_loc; } if (arguments->arguments != NULL) { - return arguments->arguments->base.location.end; + return &arguments->arguments->base.location; + } + if (arguments->opening_loc.length > 0) { + return &arguments->opening_loc; } - return arguments->closing_loc.end; + return NULL; } /** @@ -1629,7 +1635,7 @@ static void pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) { // First, check that we have arguments and that we don't have a closing // location for them. - if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) { + if (arguments->arguments == NULL || arguments->closing_loc.length > 0) { return; } @@ -1906,7 +1912,7 @@ pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closin size_t unknown_flags_length = pm_buffer_length(&unknown_flags); if (unknown_flags_length != 0) { const char *word = unknown_flags_length >= 2 ? "options" : "option"; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags)); } pm_buffer_free(&unknown_flags); } @@ -1940,32 +1946,22 @@ pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) { } #define PM_NODE_ALLOC(parser_, type_) (type_ *) pm_node_alloc(parser_, sizeof(type_)) -#define PM_NODE_INIT(parser_, type_, flags_, start_, end_) (pm_node_t) { \ +#define PM_NODE_INIT(parser_, type_, flags_, location_) (pm_node_t) { \ .type = (type_), \ .flags = (flags_), \ .node_id = ++(parser_)->node_id, \ - .location = { .start = (start_), .end = (end_) } \ + .location = location_ \ } -#define PM_NODE_INIT_UNSET(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, NULL, NULL) -#define PM_NODE_INIT_BASE(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, (parser_)->start, (parser_)->start) -#define PM_NODE_INIT_TOKEN(parser_, type_, flags_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_TOKEN_END(token_)) -#define PM_NODE_INIT_NODE(parser_, type_, flags_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_NODE_END(node_)) - -#define PM_NODE_INIT_TOKENS(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(left_), PM_TOKEN_END(right_)) -#define PM_NODE_INIT_NODES(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(left_), PM_NODE_END(right_)) -#define PM_NODE_INIT_TOKEN_NODE(parser_, type_, flags_, token_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_NODE_END(node_)) -#define PM_NODE_INIT_NODE_TOKEN(parser_, type_, flags_, node_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_TOKEN_END(token_)) - /** * Allocate a new MissingNode node. */ static pm_missing_node_t * -pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { +pm_missing_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) { pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t); *node = (pm_missing_node_t) { - .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, start, end) + .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, ((pm_location_t) { .start = start, .length = length })) }; return node; @@ -1980,10 +1976,10 @@ pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyw pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t); *node = (pm_alias_global_variable_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, keyword, old_name), + .base = PM_NODE_INIT(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name)), .new_name = new_name, .old_name = old_name, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) + .keyword_loc = TOK2LOC(parser, keyword) }; return node; @@ -1998,10 +1994,10 @@ pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_n pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t); *node = (pm_alias_method_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_METHOD_NODE, 0, keyword, old_name), + .base = PM_NODE_INIT(parser, PM_ALIAS_METHOD_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name)), .new_name = new_name, .old_name = old_name, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) + .keyword_loc = TOK2LOC(parser, keyword) }; return node; @@ -2015,10 +2011,10 @@ pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t); *node = (pm_alternation_pattern_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_ALTERNATION_PATTERN_NODE, 0, left, right), + .base = PM_NODE_INIT(parser, PM_ALTERNATION_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(left, right)), .left = left, .right = right, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -2034,9 +2030,9 @@ pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *opera pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t); *node = (pm_and_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_AND_NODE, 0, left, right), + .base = PM_NODE_INIT(parser, PM_AND_NODE, 0, PM_LOCATION_INIT_NODES(left, right)), .left = left, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .right = right }; @@ -2051,7 +2047,7 @@ pm_arguments_node_create(pm_parser_t *parser) { pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t); *node = (pm_arguments_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_ARGUMENTS_NODE, 0), + .base = PM_NODE_INIT(parser, PM_ARGUMENTS_NODE, 0, PM_LOCATION_INIT_UNSET), .arguments = { 0 } }; @@ -2072,11 +2068,11 @@ pm_arguments_node_size(pm_arguments_node_t *node) { static void pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) { if (pm_arguments_node_size(node) == 0) { - node->base.location.start = argument->location.start; + PM_NODE_START_SET_NODE(node, argument); } - if (node->base.location.end < argument->location.end) { - node->base.location.end = argument->location.end; + if (PM_NODE_END(node) < PM_NODE_END(argument)) { + PM_NODE_LENGTH_SET_NODE(node, argument); } pm_node_list_append(&node->arguments, argument); @@ -2097,12 +2093,21 @@ static pm_array_node_t * pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) { pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t); - *node = (pm_array_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .elements = { 0 } - }; + if (opening == NULL) { + *node = (pm_array_node_t) { + .base = PM_NODE_INIT(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_UNSET), + .opening_loc = { 0 }, + .closing_loc = { 0 }, + .elements = { 0 } + }; + } else { + *node = (pm_array_node_t) { + .base = PM_NODE_INIT(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, opening)), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, opening), + .elements = { 0 } + }; + } return node; } @@ -2112,12 +2117,12 @@ pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) { */ static inline void pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) { - if (!node->elements.size && !node->opening_loc.start) { - node->base.location.start = element->location.start; + if (!node->elements.size && !node->opening_loc.length) { + PM_NODE_START_SET_NODE(node, element); } pm_node_list_append(&node->elements, element); - node->base.location.end = element->location.end; + PM_NODE_LENGTH_SET_NODE(node, element); // If the element is not a static literal, then the array is not a static // literal. Turn that flag off. @@ -2134,10 +2139,10 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) { * Set the closing token and end location of an array node. */ static void -pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) { - assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED); - node->base.location.end = closing->end; - node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing); +pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) { + assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); + node->closing_loc = TOK2LOC(parser, closing); } /** @@ -2149,7 +2154,7 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); *node = (pm_array_pattern_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_ARRAY_PATTERN_NODE, 0, nodes->nodes[0], nodes->nodes[nodes->size - 1]), + .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1])), .constant = NULL, .rest = NULL, .requireds = { 0 }, @@ -2185,7 +2190,7 @@ pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) { pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); *node = (pm_array_pattern_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_ARRAY_PATTERN_NODE, 0, rest), + .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_NODE(rest)), .constant = NULL, .rest = rest, .requireds = { 0 }, @@ -2206,11 +2211,11 @@ pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); *node = (pm_array_pattern_node_t) { - .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_ARRAY_PATTERN_NODE, 0, constant, closing), + .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing)), .constant = constant, .rest = NULL, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing), .requireds = { 0 }, .posts = { 0 } }; @@ -2227,11 +2232,11 @@ pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *openin pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); *node = (pm_array_pattern_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_ARRAY_PATTERN_NODE, 0, opening, closing), + .base = PM_NODE_INIT(parser, PM_ARRAY_PATTERN_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), .constant = NULL, .rest = NULL, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing), .requireds = { 0 }, .posts = { 0 } }; @@ -2250,14 +2255,14 @@ pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t static pm_assoc_node_t * pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) { pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t); - const uint8_t *end; + uint32_t end; - if (value != NULL && value->location.end > key->location.end) { - end = value->location.end; - } else if (operator->type != PM_TOKEN_NOT_PROVIDED) { - end = operator->end; + if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) { + end = PM_NODE_END(value); + } else if (operator != NULL) { + end = PM_TOKEN_END(parser, operator); } else { - end = key->location.end; + end = PM_NODE_END(key); } // Hash string keys will be frozen, so we can mark them as frozen here so @@ -2278,9 +2283,9 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper } *node = (pm_assoc_node_t) { - .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, key->location.start, end), + .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) })), .key = key, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .operator_loc = NTOK2LOC(parser, operator), .value = value }; @@ -2296,13 +2301,9 @@ pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t); *node = (pm_assoc_splat_node_t) { - .base = ( - (value == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_ASSOC_SPLAT_NODE, 0, operator) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_ASSOC_SPLAT_NODE, 0, operator, value) - ), + .base = PM_NODE_INIT(parser, PM_ASSOC_SPLAT_NODE, 0, (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value)), .value = value, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -2317,7 +2318,7 @@ pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t); *node = (pm_back_reference_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_BACK_REFERENCE_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_BACK_REFERENCE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .name = pm_parser_constant_id_token(parser, name) }; @@ -2331,13 +2332,12 @@ static pm_begin_node_t * pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) { pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t); + uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword); + uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements); + *node = (pm_begin_node_t) { - .base = ( - (statements == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_BEGIN_NODE, 0, begin_keyword) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_BEGIN_NODE, 0, begin_keyword, statements) - ), - .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword), + .base = PM_NODE_INIT(parser, PM_BEGIN_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .begin_keyword_loc = NTOK2LOC(parser, begin_keyword), .statements = statements, .end_keyword_loc = { 0 } }; @@ -2350,11 +2350,10 @@ pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_st */ static void pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) { - // If the begin keyword doesn't exist, we set the start on the begin_node - if (!node->begin_keyword_loc.start) { - node->base.location.start = rescue_clause->base.location.start; + if (node->begin_keyword_loc.length == 0) { + PM_NODE_START_SET_NODE(node, rescue_clause); } - node->base.location.end = rescue_clause->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, rescue_clause); node->rescue_clause = rescue_clause; } @@ -2363,7 +2362,10 @@ pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_ */ static void pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) { - node->base.location.end = else_clause->base.location.end; + if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) { + PM_NODE_START_SET_NODE(node, else_clause); + } + PM_NODE_LENGTH_SET_NODE(node, else_clause); node->else_clause = else_clause; } @@ -2372,7 +2374,10 @@ pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause */ static void pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) { - node->base.location.end = ensure_clause->base.location.end; + if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) { + PM_NODE_START_SET_NODE(node, ensure_clause); + } + PM_NODE_LENGTH_SET_NODE(node, ensure_clause); node->ensure_clause = ensure_clause; } @@ -2380,11 +2385,10 @@ pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_ * Set the end keyword and end location of a begin node. */ static void -pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) { - assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING); - - node->base.location.end = end_keyword->end; - node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword); +pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) { + assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0); + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); + node->end_keyword_loc = TOK2LOC(parser, end_keyword); } /** @@ -2392,16 +2396,13 @@ pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keywo */ static pm_block_argument_node_t * pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) { + assert(operator->type == PM_TOKEN_UAMPERSAND); pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t); *node = (pm_block_argument_node_t) { - .base = ( - (expression == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator, expression) - ), + .base = PM_NODE_INIT(parser, PM_BLOCK_ARGUMENT_NODE, 0, (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression)), .expression = expression, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -2415,12 +2416,12 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t); *node = (pm_block_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_BLOCK_NODE, 0, opening, closing), + .base = PM_NODE_INIT(parser, PM_BLOCK_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), .locals = *locals, .parameters = parameters, .body = body, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing) }; return node; @@ -2431,18 +2432,14 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p */ static pm_block_parameter_node_t * pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) { - assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND); + assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND); pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t); *node = (pm_block_parameter_node_t) { - .base = ( - (name->type == PM_TOKEN_NOT_PROVIDED) - ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_PARAMETER_NODE, 0, operator) - : PM_NODE_INIT_TOKENS(parser, PM_BLOCK_PARAMETER_NODE, 0, operator, name) - ), - .name = pm_parser_optional_constant_id_token(parser, name), - .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETER_NODE, 0, (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name)), + .name = name == NULL ? 0 : pm_parser_constant_id_token(parser, name), + .name_loc = NTOK2LOC(parser, name), + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -2455,28 +2452,28 @@ static pm_block_parameters_node_t * pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) { pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t); - const uint8_t *start; - if (opening->type != PM_TOKEN_NOT_PROVIDED) { - start = opening->start; + uint32_t start; + if (opening != NULL) { + start = PM_TOKEN_START(parser, opening); } else if (parameters != NULL) { - start = parameters->base.location.start; + start = PM_NODE_START(parameters); } else { - start = NULL; + start = 0; } - const uint8_t *end; + uint32_t end; if (parameters != NULL) { - end = parameters->base.location.end; - } else if (opening->type != PM_TOKEN_NOT_PROVIDED) { - end = opening->end; + end = PM_NODE_END(parameters); + } else if (opening != NULL) { + end = PM_TOKEN_END(parser, opening); } else { - end = NULL; + end = 0; } *node = (pm_block_parameters_node_t) { - .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, start, end), + .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })), .parameters = parameters, - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .opening_loc = NTOK2LOC(parser, opening), .closing_loc = { 0 }, .locals = { 0 } }; @@ -2488,11 +2485,10 @@ pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *param * Set the closing location of a BlockParametersNode node. */ static void -pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) { - assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING); - - node->base.location.end = closing->end; - node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing); +pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) { + assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); + node->closing_loc = TOK2LOC(parser, closing); } /** @@ -2503,7 +2499,7 @@ pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t); *node = (pm_block_local_variable_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .name = pm_parser_constant_id_token(parser, name) }; @@ -2517,8 +2513,11 @@ static void pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) { pm_node_list_append(&node->locals, UP(local)); - if (node->base.location.start == NULL) node->base.location.start = local->base.location.start; - node->base.location.end = local->base.location.end; + if (PM_NODE_LENGTH(node) == 0) { + PM_NODE_START_SET_NODE(node, local); + } + + PM_NODE_LENGTH_SET_NODE(node, local); } /** @@ -2530,13 +2529,9 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t); *node = (pm_break_node_t) { - .base = ( - (arguments == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_BREAK_NODE, 0, keyword) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_BREAK_NODE, 0, keyword, arguments) - ), + .base = PM_NODE_INIT(parser, PM_BREAK_NODE, 0, (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments)), .arguments = arguments, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) + .keyword_loc = TOK2LOC(parser, keyword) }; return node; @@ -2552,16 +2547,16 @@ static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAG static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3); /** - * Allocate and initialize a new CallNode node. This sets everything to NULL or - * PM_TOKEN_NOT_PROVIDED as appropriate such that its values can be overridden - * in the various specializations of this function. + * Allocate and initialize a new CallNode node. This sets everything to NULL + * such that its values can be overridden in the various specializations of this + * function. */ static pm_call_node_t * pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) { pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t); *node = (pm_call_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_CALL_NODE, flags), + .base = PM_NODE_INIT(parser, PM_CALL_NODE, flags, PM_LOCATION_INIT_UNSET), .receiver = NULL, .call_operator_loc = { 0 }, .message_loc = { 0 }, @@ -2600,12 +2595,15 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_ pm_call_node_t *node = pm_call_node_create(parser, flags); - node->base.location.start = receiver->location.start; - node->base.location.end = pm_arguments_end(arguments); + PM_NODE_START_SET_NODE(node, receiver); + + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); + PM_NODE_LENGTH_SET_LOCATION(node, end); node->receiver = receiver; node->message_loc.start = arguments->opening_loc.start; - node->message_loc.end = arguments->closing_loc.end; + node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start; node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; @@ -2626,11 +2624,11 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags); - node->base.location.start = MIN(receiver->location.start, argument->location.start); - node->base.location.end = MAX(receiver->location.end, argument->location.end); + PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument); + PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument); node->receiver = receiver; - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->message_loc = TOK2LOC(parser, operator); pm_arguments_node_t *arguments = pm_arguments_node_create(parser); pm_arguments_node_arguments_append(arguments, argument); @@ -2651,16 +2649,17 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = receiver->location.start; - const uint8_t *end = pm_arguments_end(arguments); + PM_NODE_START_SET_NODE(node, receiver); + const pm_location_t *end = pm_arguments_end(arguments); if (end == NULL) { - end = message->end; + PM_NODE_LENGTH_SET_TOKEN(parser, node, message); + } else { + PM_NODE_LENGTH_SET_LOCATION(node, end); } - node->base.location.end = end; node->receiver = receiver; - node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->call_operator_loc = TOK2LOC(parser, operator); + node->message_loc = TOK2LOC(parser, message); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; @@ -2674,7 +2673,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o * If the final character is `@` as is the case for `foo.~@`, * we should ignore the @ in the same way we do for symbols. */ - node->name = pm_parser_constant_id_location(parser, message->start, parse_operator_symbol_name(message)); + node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message)); return node; } @@ -2684,12 +2683,9 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o static pm_call_node_t * pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) { pm_call_node_t *node = pm_call_node_create(parser, 0); - node->base.location.start = parser->start; - node->base.location.end = parser->end; + node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) }; node->receiver = receiver; - node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL }; - node->message_loc = (pm_location_t) { .start = NULL, .end = NULL }; node->arguments = arguments; node->name = pm_parser_constant_id_constant(parser, message, strlen(message)); @@ -2704,10 +2700,12 @@ static pm_call_node_t * pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) { pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY); - node->base.location.start = message->start; - node->base.location.end = pm_arguments_end(arguments); + PM_NODE_START_SET_TOKEN(parser, node, message); + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); + PM_NODE_LENGTH_SET_LOCATION(node, end); - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->message_loc = TOK2LOC(parser, message); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; @@ -2725,7 +2723,7 @@ static pm_call_node_t * pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) { pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY); - node->base.location = PM_LOCATION_NULL_VALUE(parser); + node->base.location = (pm_location_t) { 0 }; node->arguments = arguments; node->name = name; @@ -2742,16 +2740,16 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = message->start; - if (arguments->closing_loc.start != NULL) { - node->base.location.end = arguments->closing_loc.end; + PM_NODE_START_SET_TOKEN(parser, node, message); + if (arguments->closing_loc.length > 0) { + PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc); } else { assert(receiver != NULL); - node->base.location.end = receiver->location.end; + PM_NODE_LENGTH_SET_NODE(node, receiver); } node->receiver = receiver; - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->message_loc = TOK2LOC(parser, message); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; @@ -2769,11 +2767,13 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = receiver->location.start; - node->base.location.end = pm_arguments_end(arguments); + PM_NODE_START_SET_NODE(node, receiver); + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); + PM_NODE_LENGTH_SET_LOCATION(node, end); node->receiver = receiver; - node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->call_operator_loc = TOK2LOC(parser, operator); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; @@ -2796,11 +2796,11 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t * pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = operator->start; - node->base.location.end = receiver->location.end; + PM_NODE_START_SET_TOKEN(parser, node, operator); + PM_NODE_LENGTH_SET_NODE(node, receiver); node->receiver = receiver; - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->message_loc = TOK2LOC(parser, operator); node->name = pm_parser_constant_id_constant(parser, name, strlen(name)); return node; @@ -2814,8 +2814,8 @@ static pm_call_node_t * pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) { pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY); - node->base.location = PM_LOCATION_TOKEN_VALUE(message); - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->base.location = TOK2LOC(parser, message); + node->message_loc = TOK2LOC(parser, message); node->name = pm_parser_constant_id_token(parser, message); return node; @@ -2828,11 +2828,11 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) { static inline bool pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) { return ( - (node->message_loc.start != NULL) && - (node->message_loc.end[-1] != '!') && - (node->message_loc.end[-1] != '?') && - char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) && - (node->opening_loc.start == NULL) && + (node->message_loc.length > 0) && + (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') && + (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') && + char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) && + (node->opening_loc.length == 0) && (node->arguments == NULL) && (node->block == NULL) ); @@ -2868,13 +2868,13 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t); *node = (pm_call_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CALL_AND_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_CALL_AND_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .message_loc = target->message_loc, .read_name = 0, .write_name = target->name, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -2923,14 +2923,14 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); *node = (pm_index_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INDEX_AND_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_INDEX_AND_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .opening_loc = target->opening_loc, .arguments = target->arguments, .closing_loc = target->closing_loc, .block = (pm_block_argument_node_t *) target->block, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -2951,14 +2951,14 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t); *node = (pm_call_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .message_loc = target->message_loc, .read_name = 0, .write_name = target->name, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1), - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -2983,15 +2983,15 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); *node = (pm_index_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .opening_loc = target->opening_loc, .arguments = target->arguments, .closing_loc = target->closing_loc, .block = (pm_block_argument_node_t *) target->block, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1), - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3013,13 +3013,13 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t); *node = (pm_call_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CALL_OR_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_CALL_OR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .message_loc = target->message_loc, .read_name = 0, .write_name = target->name, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3045,14 +3045,14 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); *node = (pm_index_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OR_WRITE_NODE, FL(target), target, value), + .base = PM_NODE_INIT(parser, PM_INDEX_OR_WRITE_NODE, FL(target), PM_LOCATION_INIT_NODES(target, value)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .opening_loc = target->opening_loc, .arguments = target->arguments, .closing_loc = target->closing_loc, .block = (pm_block_argument_node_t *) target->block, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3073,7 +3073,7 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) { pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t); *node = (pm_call_target_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_CALL_TARGET_NODE, FL(target), target), + .base = PM_NODE_INIT(parser, PM_CALL_TARGET_NODE, FL(target), PM_LOCATION_INIT_NODE(target)), .receiver = target->receiver, .call_operator_loc = target->call_operator_loc, .name = target->name, @@ -3084,11 +3084,8 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) { * where the call operator was not present. In that case we will have a * problem because it is a required location. In this case we need to fill * it in with a fake location so that the syntax tree remains valid. */ - if (node->call_operator_loc.start == NULL) { - node->call_operator_loc = (pm_location_t) { - .start = target->base.location.start, - .end = target->base.location.start - }; + if (node->call_operator_loc.length == 0) { + node->call_operator_loc = target->base.location; } // Here we're going to free the target, since it is no longer necessary. @@ -3111,7 +3108,7 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) { assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); *node = (pm_index_target_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, target), + .base = PM_NODE_INIT(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, PM_LOCATION_INIT_NODE(target)), .receiver = target->receiver, .opening_loc = target->opening_loc, .arguments = target->arguments, @@ -3135,10 +3132,10 @@ pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_v pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t); *node = (pm_capture_pattern_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CAPTURE_PATTERN_NODE, 0, value, target), + .base = PM_NODE_INIT(parser, PM_CAPTURE_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(value, target)), .value = value, .target = target, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -3152,11 +3149,11 @@ pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t); *node = (pm_case_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_NODE, 0, case_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_CASE_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword)), .predicate = predicate, .else_clause = NULL, - .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword), - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), + .case_keyword_loc = TOK2LOC(parser, case_keyword), + .end_keyword_loc = NTOK2LOC(parser, end_keyword), .conditions = { 0 } }; @@ -3171,7 +3168,7 @@ pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) { assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE)); pm_node_list_append(&node->conditions, condition); - node->base.location.end = condition->location.end; + PM_NODE_LENGTH_SET_NODE(node, condition); } /** @@ -3180,31 +3177,31 @@ pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) { static void pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) { node->else_clause = else_clause; - node->base.location.end = else_clause->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, else_clause); } /** * Set the end location for a CaseNode node. */ static void -pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) { - node->base.location.end = end_keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword); +pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); + node->end_keyword_loc = TOK2LOC(parser, end_keyword); } /** * Allocate and initialize a new CaseMatchNode node. */ static pm_case_match_node_t * -pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) { +pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) { pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t); *node = (pm_case_match_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_MATCH_NODE, 0, case_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_CASE_MATCH_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, case_keyword)), .predicate = predicate, .else_clause = NULL, - .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword), - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), + .case_keyword_loc = TOK2LOC(parser, case_keyword), + .end_keyword_loc = { 0 }, .conditions = { 0 } }; @@ -3219,7 +3216,7 @@ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condi assert(PM_NODE_TYPE_P(condition, PM_IN_NODE)); pm_node_list_append(&node->conditions, condition); - node->base.location.end = condition->location.end; + PM_NODE_LENGTH_SET_NODE(node, condition); } /** @@ -3228,16 +3225,16 @@ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condi static void pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) { node->else_clause = else_clause; - node->base.location.end = else_clause->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, else_clause); } /** * Set the end location for a CaseMatchNode node. */ static void -pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) { - node->base.location.end = end_keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword); +pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); + node->end_keyword_loc = TOK2LOC(parser, end_keyword); } /** @@ -3248,14 +3245,14 @@ pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t); *node = (pm_class_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_CLASS_NODE, 0, class_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_CLASS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword)), .locals = *locals, - .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword), + .class_keyword_loc = TOK2LOC(parser, class_keyword), .constant_path = constant_path, - .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator), + .inheritance_operator_loc = NTOK2LOC(parser, inheritance_operator), .superclass = superclass, .body = body, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), + .end_keyword_loc = TOK2LOC(parser, end_keyword), .name = pm_parser_constant_id_token(parser, name) }; @@ -3271,10 +3268,10 @@ pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_r pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t); *node = (pm_class_variable_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3289,12 +3286,12 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t); *node = (pm_class_variable_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) }; return node; @@ -3309,10 +3306,10 @@ pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_re pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t); *node = (pm_class_variable_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3328,7 +3325,7 @@ pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t); *node = (pm_class_variable_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_CLASS_VARIABLE_READ_NODE, 0, token), + .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)), .name = pm_parser_constant_id_token(parser, token) }; @@ -3343,7 +3340,7 @@ pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) */ static inline pm_node_flags_t pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) { - if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) { + if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) { return flags; } return 0; @@ -3358,10 +3355,10 @@ pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_class_variable_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, read_node, value), + .base = PM_NODE_INIT(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(read_node, value)), .name = read_node->name, - .name_loc = PM_LOCATION_NODE_VALUE(UP(read_node)), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .name_loc = read_node->base.location, + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3377,9 +3374,9 @@ pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_nod pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t); *node = (pm_constant_path_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .target = target, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3394,11 +3391,11 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t); *node = (pm_constant_path_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .target = target, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) }; return node; @@ -3413,9 +3410,9 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t); *node = (pm_constant_path_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .target = target, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3435,23 +3432,13 @@ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_to name = pm_parser_constant_id_token(parser, name_token); } - if (parent == NULL) { - *node = (pm_constant_path_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_CONSTANT_PATH_NODE, 0, delimiter, name_token), - .parent = parent, - .name = name, - .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter), - .name_loc = PM_LOCATION_TOKEN_VALUE(name_token) - }; - } else { - *node = (pm_constant_path_node_t) { - .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_CONSTANT_PATH_NODE, 0, parent, name_token), - .parent = parent, - .name = name, - .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter), - .name_loc = PM_LOCATION_TOKEN_VALUE(name_token) - }; - } + *node = (pm_constant_path_node_t) { + .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_NODE, 0, (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token)), + .parent = parent, + .name = name, + .delimiter_loc = TOK2LOC(parser, delimiter), + .name_loc = TOK2LOC(parser, name_token) + }; return node; } @@ -3465,9 +3452,9 @@ pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_constant_path_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)), .target = target, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3483,10 +3470,10 @@ pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t * pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t); *node = (pm_constant_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3501,12 +3488,12 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t); *node = (pm_constant_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) }; return node; @@ -3521,10 +3508,10 @@ pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *t pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t); *node = (pm_constant_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3536,11 +3523,11 @@ pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *t */ static pm_constant_read_node_t * pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) { - assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING); + assert(name->type == PM_TOKEN_CONSTANT || name->type == 0); pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t); *node = (pm_constant_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_CONSTANT_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_CONSTANT_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .name = pm_parser_constant_id_token(parser, name) }; @@ -3556,10 +3543,10 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_constant_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_WRITE_NODE, flags, target, value), + .base = PM_NODE_INIT(parser, PM_CONSTANT_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -3636,23 +3623,19 @@ pm_def_node_create( } *node = (pm_def_node_t) { - .base = ( - (end_keyword->type == PM_TOKEN_NOT_PROVIDED) - ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEF_NODE, 0, def_keyword, body) - : PM_NODE_INIT_TOKENS(parser, PM_DEF_NODE, 0, def_keyword, end_keyword) - ), + .base = PM_NODE_INIT(parser, PM_DEF_NODE, 0, (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword)), .name = name, - .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc), + .name_loc = TOK2LOC(parser, name_loc), .receiver = receiver, .parameters = parameters, .body = body, .locals = *locals, - .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword), - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), - .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen), - .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen), - .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal), - .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) + .def_keyword_loc = TOK2LOC(parser, def_keyword), + .operator_loc = NTOK2LOC(parser, operator), + .lparen_loc = NTOK2LOC(parser, lparen), + .rparen_loc = NTOK2LOC(parser, rparen), + .equal_loc = NTOK2LOC(parser, equal), + .end_keyword_loc = NTOK2LOC(parser, end_keyword) }; return node; @@ -3666,15 +3649,11 @@ pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t); *node = (pm_defined_node_t) { - .base = ( - (rparen->type == PM_TOKEN_NOT_PROVIDED) - ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEFINED_NODE, 0, keyword, value) - : PM_NODE_INIT_TOKENS(parser, PM_DEFINED_NODE, 0, keyword, rparen) - ), - .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen), + .base = PM_NODE_INIT(parser, PM_DEFINED_NODE, 0, (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen)), + .lparen_loc = NTOK2LOC(parser, lparen), .value = value, - .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) + .rparen_loc = NTOK2LOC(parser, rparen), + .keyword_loc = TOK2LOC(parser, keyword) }; return node; @@ -3688,14 +3667,10 @@ pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_stat pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t); *node = (pm_else_node_t) { - .base = ( - ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) - ? PM_NODE_INIT_TOKEN_NODE(parser, PM_ELSE_NODE, 0, else_keyword, statements) - : PM_NODE_INIT_TOKENS(parser, PM_ELSE_NODE, 0, else_keyword, end_keyword) - ), - .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword), + .base = PM_NODE_INIT(parser, PM_ELSE_NODE, 0, ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword)), + .else_keyword_loc = TOK2LOC(parser, else_keyword), .statements = statements, - .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) + .end_keyword_loc = NTOK2LOC(parser, end_keyword) }; return node; @@ -3709,10 +3684,10 @@ pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *openin pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t); *node = (pm_embedded_statements_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, opening, closing), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), + .base = PM_NODE_INIT(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), + .opening_loc = TOK2LOC(parser, opening), .statements = statements, - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) + .closing_loc = TOK2LOC(parser, closing) }; return node; @@ -3726,8 +3701,8 @@ pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t); *node = (pm_embedded_variable_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_EMBEDDED_VARIABLE_NODE, 0, operator, variable), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .base = PM_NODE_INIT(parser, PM_EMBEDDED_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable)), + .operator_loc = TOK2LOC(parser, operator), .variable = variable }; @@ -3742,10 +3717,10 @@ pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_ pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t); *node = (pm_ensure_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_ENSURE_NODE, 0, ensure_keyword, end_keyword), - .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword), + .base = PM_NODE_INIT(parser, PM_ENSURE_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword)), + .ensure_keyword_loc = TOK2LOC(parser, ensure_keyword), .statements = statements, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword) + .end_keyword_loc = TOK2LOC(parser, end_keyword) }; return node; @@ -3760,7 +3735,7 @@ pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t); *node = (pm_false_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token) + .base = PM_NODE_INIT(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -3781,7 +3756,7 @@ pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) { pm_node_t *right; if (nodes->size == 1) { - right = UP(pm_missing_node_create(parser, left->location.end, left->location.end)); + right = UP(pm_missing_node_create(parser, PM_NODE_END(left), 0)); } else { right = nodes->nodes[nodes->size - 1]; assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE)); @@ -3795,7 +3770,7 @@ pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) { pm_node_t *right_splat_node = right; #endif *node = (pm_find_pattern_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_FIND_PATTERN_NODE, 0, left, right), + .base = PM_NODE_INIT(parser, PM_FIND_PATTERN_NODE, 0, PM_LOCATION_INIT_NODES(left, right)), .constant = NULL, .left = left_splat_node, .right = right_splat_node, @@ -3859,7 +3834,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) { // This should never happen, because we've already checked that the token // is in a valid format. However it's good to be safe. if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE); xfree((void *) buffer); return 0.0; } @@ -3878,7 +3853,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) { ellipsis = ""; } - pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis); + pm_diagnostic_list_append_format(&parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis); value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL; } @@ -3896,7 +3871,7 @@ pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t); *node = (pm_float_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .value = pm_double_parse(parser, token) }; @@ -3912,7 +3887,7 @@ pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) { pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); *node = (pm_imaginary_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numeric = UP(pm_float_node_create(parser, &((pm_token_t) { .type = PM_TOKEN_FLOAT, .start = token->start, @@ -3932,7 +3907,7 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) { pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t); *node = (pm_rational_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numerator = { 0 }, .denominator = { 0 } }; @@ -3985,7 +3960,7 @@ pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *t pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); *node = (pm_imaginary_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numeric = UP(pm_float_node_rational_create(parser, &((pm_token_t) { .type = PM_TOKEN_FLOAT_RATIONAL, .start = token->start, @@ -4013,14 +3988,14 @@ pm_for_node_create( pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t); *node = (pm_for_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_FOR_NODE, 0, for_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_FOR_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword)), .index = index, .collection = collection, .statements = statements, - .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword), - .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword), - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword) + .for_keyword_loc = TOK2LOC(parser, for_keyword), + .in_keyword_loc = TOK2LOC(parser, in_keyword), + .do_keyword_loc = NTOK2LOC(parser, do_keyword), + .end_keyword_loc = TOK2LOC(parser, end_keyword) }; return node; @@ -4035,7 +4010,7 @@ pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t); *node = (pm_forwarding_arguments_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -4050,7 +4025,7 @@ pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t); *node = (pm_forwarding_parameter_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_PARAMETER_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_FORWARDING_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -4071,11 +4046,7 @@ pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm } *node = (pm_forwarding_super_node_t) { - .base = ( - (block == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_SUPER_NODE, 0, token) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_FORWARDING_SUPER_NODE, 0, token, block) - ), + .base = PM_NODE_INIT(parser, PM_FORWARDING_SUPER_NODE, 0, (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block)), .block = block }; @@ -4091,10 +4062,10 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t); *node = (pm_hash_pattern_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_HASH_PATTERN_NODE, 0, opening, closing), + .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), .constant = NULL, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing), .elements = { 0 }, .rest = NULL }; @@ -4109,25 +4080,25 @@ static pm_hash_pattern_node_t * pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) { pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t); - const uint8_t *start; - const uint8_t *end; + uint32_t start; + uint32_t end; if (elements->size > 0) { if (rest) { - start = MIN(rest->location.start, elements->nodes[0]->location.start); - end = MAX(rest->location.end, elements->nodes[elements->size - 1]->location.end); + start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0])); + end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1])); } else { - start = elements->nodes[0]->location.start; - end = elements->nodes[elements->size - 1]->location.end; + start = PM_NODE_START(elements->nodes[0]); + end = PM_NODE_END(elements->nodes[elements->size - 1]); } } else { assert(rest != NULL); - start = rest->location.start; - end = rest->location.end; + start = PM_NODE_START(rest); + end = PM_NODE_END(rest); } *node = (pm_hash_pattern_node_t) { - .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, start, end), + .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })), .constant = NULL, .elements = { 0 }, .rest = rest, @@ -4152,7 +4123,7 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) { case PM_NUMBERED_REFERENCE_READ_NODE: // This will only ever happen in the event of a syntax error, but we // still need to provide something for the node. - return pm_parser_constant_id_location(parser, target->location.start, target->location.end); + return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target)); default: assert(false && "unreachable"); return (pm_constant_id_t) -1; @@ -4168,10 +4139,10 @@ pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t); *node = (pm_global_variable_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = pm_global_variable_write_name(parser, target), .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4186,12 +4157,12 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t); *node = (pm_global_variable_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = pm_global_variable_write_name(parser, target), .name_loc = target->location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) }; return node; @@ -4206,10 +4177,10 @@ pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t); *node = (pm_global_variable_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = pm_global_variable_write_name(parser, target), .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4224,7 +4195,7 @@ pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t); *node = (pm_global_variable_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .name = pm_parser_constant_id_token(parser, name) }; @@ -4239,7 +4210,7 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t); *node = (pm_global_variable_read_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_UNSET), .name = name }; @@ -4255,10 +4226,10 @@ pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, con pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_global_variable_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, target, value), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)), .name = pm_global_variable_write_name(parser, target), - .name_loc = PM_LOCATION_NODE_VALUE(target), - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .name_loc = target->location, + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4273,10 +4244,10 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t); *node = (pm_global_variable_write_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0), + .base = PM_NODE_INIT(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0, PM_LOCATION_INIT_UNSET), .name = name, - .name_loc = PM_LOCATION_NULL_VALUE(parser), - .operator_loc = PM_LOCATION_NULL_VALUE(parser), + .name_loc = { 0 }, + .operator_loc = { 0 }, .value = value }; @@ -4292,9 +4263,9 @@ pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) { pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t); *node = (pm_hash_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_NULL_VALUE(parser), + .base = PM_NODE_INIT(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, opening)), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = { 0 }, .elements = { 0 } }; @@ -4322,9 +4293,9 @@ pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) { } static inline void -pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) { - hash->base.location.end = token->end; - hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token); +pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) { + PM_NODE_LENGTH_SET_TOKEN(parser, hash, token); + hash->closing_loc = TOK2LOC(parser, token); } /** @@ -4342,25 +4313,27 @@ pm_if_node_create(pm_parser_t *parser, pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t); - const uint8_t *end; - if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) { - end = end_keyword->end; + uint32_t start = PM_TOKEN_START(parser, if_keyword); + uint32_t end; + + if (end_keyword != NULL) { + end = PM_TOKEN_END(parser, end_keyword); } else if (subsequent != NULL) { - end = subsequent->location.end; + end = PM_NODE_END(subsequent); } else if (pm_statements_node_body_length(statements) != 0) { - end = statements->base.location.end; + end = PM_NODE_END(statements); } else { - end = predicate->location.end; + end = PM_NODE_END(predicate); } *node = (pm_if_node_t) { - .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, if_keyword->start, end), - .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword), + .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .if_keyword_loc = TOK2LOC(parser, if_keyword), .predicate = predicate, - .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword), + .then_keyword_loc = NTOK2LOC(parser, then_keyword), .statements = statements, .subsequent = subsequent, - .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) + .end_keyword_loc = NTOK2LOC(parser, end_keyword) }; return node; @@ -4378,8 +4351,8 @@ pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_t pm_statements_node_body_append(parser, statements, statement, true); *node = (pm_if_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate), - .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword), + .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_NODES(statement, predicate)), + .if_keyword_loc = TOK2LOC(parser, if_keyword), .predicate = predicate, .then_keyword_loc = { 0 }, .statements = statements, @@ -4404,16 +4377,14 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_to pm_statements_node_t *else_statements = pm_statements_node_create(parser); pm_statements_node_body_append(parser, else_statements, false_expression, true); - pm_token_t end_keyword = not_provided(parser); - pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword); - + pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL); pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t); *node = (pm_if_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, predicate, false_expression), + .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_NODES(predicate, false_expression)), .if_keyword_loc = { 0 }, .predicate = predicate, - .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark), + .then_keyword_loc = TOK2LOC(parser, qmark), .statements = if_statements, .subsequent = UP(else_node), .end_keyword_loc = { 0 } @@ -4424,15 +4395,15 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_to } static inline void -pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) { - node->base.location.end = keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword); +pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword); + node->end_keyword_loc = TOK2LOC(parser, keyword); } static inline void -pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) { - node->base.location.end = keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword); +pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword); + node->end_keyword_loc = TOK2LOC(parser, keyword); } /** @@ -4443,7 +4414,7 @@ pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) { pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t); *node = (pm_implicit_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_IMPLICIT_NODE, 0, value), + .base = PM_NODE_INIT(parser, PM_IMPLICIT_NODE, 0, PM_LOCATION_INIT_NODE(value)), .value = value }; @@ -4460,7 +4431,7 @@ pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t); *node = (pm_implicit_rest_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IMPLICIT_REST_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_IMPLICIT_REST_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -4475,7 +4446,7 @@ pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t); *node = (pm_integer_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .value = { 0 } }; @@ -4502,7 +4473,7 @@ pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, cons pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); *node = (pm_imaginary_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numeric = UP(pm_integer_node_create(parser, base, &((pm_token_t) { .type = PM_TOKEN_INTEGER, .start = token->start, @@ -4523,7 +4494,7 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t); *node = (pm_rational_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numerator = { 0 }, .denominator = { .value = 1, 0 } }; @@ -4552,7 +4523,7 @@ pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t b pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); *node = (pm_imaginary_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token), + .base = PM_NODE_INIT(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)), .numeric = UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) { .type = PM_TOKEN_INTEGER_RATIONAL, .start = token->start, @@ -4570,21 +4541,23 @@ static pm_in_node_t * pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) { pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t); - const uint8_t *end; + uint32_t start = PM_TOKEN_START(parser, in_keyword); + uint32_t end; + if (statements != NULL) { - end = statements->base.location.end; - } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) { - end = then_keyword->end; + end = PM_NODE_END(statements); + } else if (then_keyword != NULL) { + end = PM_TOKEN_END(parser, then_keyword); } else { - end = pattern->location.end; + end = PM_NODE_END(pattern); } *node = (pm_in_node_t) { - .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, in_keyword->start, end), + .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })), .pattern = pattern, .statements = statements, - .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword), - .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword) + .in_loc = TOK2LOC(parser, in_keyword), + .then_loc = NTOK2LOC(parser, then_keyword) }; return node; @@ -4599,10 +4572,10 @@ pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_vari pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t); *node = (pm_instance_variable_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4617,12 +4590,12 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t); *node = (pm_instance_variable_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) }; return node; @@ -4637,10 +4610,10 @@ pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_varia pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t); *node = (pm_instance_variable_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name = target->name, .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4656,7 +4629,7 @@ pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *tok pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t); *node = (pm_instance_variable_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, token), + .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)), .name = pm_parser_constant_id_token(parser, token) }; @@ -4673,10 +4646,10 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_instance_variable_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, read_node, value), + .base = PM_NODE_INIT(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(read_node, value)), .name = read_node->name, - .name_loc = PM_LOCATION_NODE_VALUE(read_node), - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .name_loc = read_node->base.location, + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -4735,9 +4708,9 @@ pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_tok pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t); *node = (pm_interpolated_regular_expression_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(opening), + .base = PM_NODE_INIT(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, opening)), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, opening), .parts = { 0 } }; @@ -4746,11 +4719,11 @@ pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_tok static inline void pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) { - if (node->base.location.start > part->location.start) { - node->base.location.start = part->location.start; + if (PM_NODE_START(node) > PM_NODE_START(part)) { + PM_NODE_START_SET_NODE(node, part); } - if (node->base.location.end < part->location.end) { - node->base.location.end = part->location.end; + if (PM_NODE_END(node) < PM_NODE_END(part)) { + PM_NODE_LENGTH_SET_NODE(node, part); } pm_interpolated_node_append(UP(node), &node->parts, part); @@ -4758,8 +4731,8 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio static inline void pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing)); } @@ -4794,11 +4767,13 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_ #define MUTABLE_FLAGS(node) \ node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN); - if (node->parts.size == 0 && node->opening_loc.start == NULL) { - node->base.location.start = part->location.start; + if (node->parts.size == 0 && node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(node, part); } - node->base.location.end = MAX(node->base.location.end, part->location.end); + if (PM_NODE_END(part) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, part); + } switch (PM_NODE_TYPE(part)) { case PM_STRING_NODE: @@ -4893,10 +4868,13 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin break; } + uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening); + uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing); + *node = (pm_interpolated_string_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_STRING_NODE, flags, opening, closing), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_INTERPOLATED_STRING_NODE, flags, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .opening_loc = NTOK2LOC(parser, opening), + .closing_loc = NTOK2LOC(parser, closing), .parts = { 0 } }; @@ -4914,25 +4892,28 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin * Set the closing token of the given InterpolatedStringNode node. */ static void -pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; +pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) { + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); } static void pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) { - if (node->parts.size == 0 && node->opening_loc.start == NULL) { - node->base.location.start = part->location.start; + if (node->parts.size == 0 && node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(node, part); } pm_interpolated_node_append(UP(node), &node->parts, part); - node->base.location.end = MAX(node->base.location.end, part->location.end); + + if (PM_NODE_END(part) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, part); + } } static void -pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; +pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) { + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); } /** @@ -4942,10 +4923,13 @@ static pm_interpolated_symbol_node_t * pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) { pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t); + uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening); + uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing); + *node = (pm_interpolated_symbol_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .opening_loc = NTOK2LOC(parser, opening), + .closing_loc = NTOK2LOC(parser, closing), .parts = { 0 } }; @@ -4967,9 +4951,9 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t); *node = (pm_interpolated_x_string_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_X_STRING_NODE, 0, opening, closing), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_INTERPOLATED_X_STRING_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing), .parts = { 0 } }; @@ -4979,13 +4963,13 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi static inline void pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) { pm_interpolated_node_append(UP(node), &node->parts, part); - node->base.location.end = part->location.end; + PM_NODE_LENGTH_SET_NODE(node, part); } static inline void -pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; +pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) { + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); } /** @@ -4996,7 +4980,7 @@ pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *nam pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t); *node = (pm_it_local_variable_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), }; return node; @@ -5010,7 +4994,7 @@ pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, con pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t); *node = (pm_it_parameters_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_IT_PARAMETERS_NODE, 0, opening, closing), + .base = PM_NODE_INIT(parser, PM_IT_PARAMETERS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), }; return node; @@ -5024,7 +5008,7 @@ pm_keyword_hash_node_create(pm_parser_t *parser) { pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t); *node = (pm_keyword_hash_node_t) { - .base = PM_NODE_INIT_UNSET(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS), + .base = PM_NODE_INIT(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS, PM_LOCATION_INIT_UNSET), .elements = { 0 } }; @@ -5043,10 +5027,10 @@ pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *el } pm_node_list_append(&hash->elements, element); - if (hash->base.location.start == NULL) { - hash->base.location.start = element->location.start; + if (PM_NODE_LENGTH(hash) == 0) { + PM_NODE_START_SET_NODE(hash, element); } - hash->base.location.end = element->location.end; + PM_NODE_LENGTH_SET_NODE(hash, element); } /** @@ -5057,9 +5041,9 @@ pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t); *node = (pm_required_keyword_parameter_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, name), - .name = pm_parser_constant_id_location(parser, name->start, name->end - 1), - .name_loc = PM_LOCATION_TOKEN_VALUE(name), + .base = PM_NODE_INIT(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), + .name = pm_parser_constant_id_raw(parser, name->start, name->end - 1), + .name_loc = TOK2LOC(parser, name), }; return node; @@ -5073,9 +5057,9 @@ pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t); *node = (pm_optional_keyword_parameter_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, name, value), - .name = pm_parser_constant_id_location(parser, name->start, name->end - 1), - .name_loc = PM_LOCATION_TOKEN_VALUE(name), + .base = PM_NODE_INIT(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, name, value)), + .name = pm_parser_constant_id_raw(parser, name->start, name->end - 1), + .name_loc = TOK2LOC(parser, name), .value = value }; @@ -5090,14 +5074,10 @@ pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *ope pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t); *node = (pm_keyword_rest_parameter_node_t) { - .base = ( - (name->type == PM_TOKEN_NOT_PROVIDED) - ? PM_NODE_INIT_TOKEN(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator) - : PM_NODE_INIT_TOKENS(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator, name) - ), - .name = pm_parser_optional_constant_id_token(parser, name), - .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .base = PM_NODE_INIT(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name)), + .name = name == NULL ? 0 : pm_parser_constant_id_token(parser, name), + .name_loc = NTOK2LOC(parser, name), + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5119,11 +5099,11 @@ pm_lambda_node_create( pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t); *node = (pm_lambda_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_LAMBDA_NODE, 0, operator, closing), + .base = PM_NODE_INIT(parser, PM_LAMBDA_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, operator, closing)), .locals = *locals, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .operator_loc = TOK2LOC(parser, operator), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing), .parameters = parameters, .body = body }; @@ -5141,9 +5121,9 @@ pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t); *node = (pm_local_variable_and_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value, .name = name, .depth = depth @@ -5160,12 +5140,12 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t); *node = (pm_local_variable_operator_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name_loc = target->location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .binary_operator_loc = TOK2LOC(parser, operator), .value = value, .name = name, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1), + .binary_operator = pm_parser_constant_id_raw(parser, operator->start, operator->end - 1), .depth = depth }; @@ -5182,9 +5162,9 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t); *node = (pm_local_variable_or_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, target, value), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, PM_LOCATION_INIT_NODES(target, value)), .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value, .name = name, .depth = depth @@ -5203,7 +5183,7 @@ pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_tok pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t); *node = (pm_local_variable_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .name = name_id, .depth = depth }; @@ -5239,12 +5219,12 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_local_variable_write_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, name_loc, value), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start })), .name = name, .depth = depth, .value = value, .name_loc = *name_loc, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5263,8 +5243,13 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) { * are of the form /^_\d$/). */ static inline bool -pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) { - return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1])); +pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) { + return ( + (length == 2) && + (parser->start[start] == '_') && + (parser->start[start + 1] != '0') && + pm_char_is_decimal_digit(parser->start[start + 1]) + ); } /** @@ -5272,9 +5257,9 @@ pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) { * an appropriate error message to the parser. */ static inline void -pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - if (pm_token_is_numbered_parameter(start, end)) { - PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start); +pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) { + if (pm_token_is_numbered_parameter(parser, start, length)) { + PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start); } } @@ -5284,11 +5269,11 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui */ static pm_local_variable_target_node_t * pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) { - pm_refute_numbered_parameter(parser, location->start, location->end); + pm_refute_numbered_parameter(parser, location->start, location->length); pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t); *node = (pm_local_variable_target_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, location), + .base = PM_NODE_INIT(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, ((pm_location_t) { .start = location->start, .length = location->length })), .name = name, .depth = depth }; @@ -5306,10 +5291,10 @@ pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t); *node = (pm_match_predicate_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_MATCH_PREDICATE_NODE, 0, value, pattern), + .base = PM_NODE_INIT(parser, PM_MATCH_PREDICATE_NODE, 0, PM_LOCATION_INIT_NODES(value, pattern)), .value = value, .pattern = pattern, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5325,10 +5310,10 @@ pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t * pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t); *node = (pm_match_required_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_MATCH_REQUIRED_NODE, 0, value, pattern), + .base = PM_NODE_INIT(parser, PM_MATCH_REQUIRED_NODE, 0, PM_LOCATION_INIT_NODES(value, pattern)), .value = value, .pattern = pattern, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5342,7 +5327,7 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) { pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t); *node = (pm_match_write_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_MATCH_WRITE_NODE, 0, call), + .base = PM_NODE_INIT(parser, PM_MATCH_WRITE_NODE, 0, PM_LOCATION_INIT_NODE(call)), .call = call, .targets = { 0 } }; @@ -5358,12 +5343,12 @@ pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t); *node = (pm_module_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_MODULE_NODE, 0, module_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_MODULE_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword)), .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals), - .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword), + .module_keyword_loc = TOK2LOC(parser, module_keyword), .constant_path = constant_path, .body = body, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), + .end_keyword_loc = TOK2LOC(parser, end_keyword), .name = pm_parser_constant_id_token(parser, name) }; @@ -5378,7 +5363,7 @@ pm_multi_target_node_create(pm_parser_t *parser) { pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t); *node = (pm_multi_target_node_t) { - .base = PM_NODE_INIT_UNSET(parser, PM_MULTI_TARGET_NODE, 0), + .base = PM_NODE_INIT(parser, PM_MULTI_TARGET_NODE, 0, PM_LOCATION_INIT_UNSET), .lefts = { 0 }, .rest = NULL, .rights = { 0 }, @@ -5405,7 +5390,7 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t if (node->rest == NULL) { node->rest = target; } else { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST); pm_node_list_append(&node->rights, target); } } else if (node->rest == NULL) { @@ -5414,12 +5399,12 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t pm_node_list_append(&node->rights, target); } - if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) { - node->base.location.start = target->location.start; + if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) { + PM_NODE_START_SET_NODE(node, target); } - if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) { - node->base.location.end = target->location.end; + if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) { + PM_NODE_LENGTH_SET_NODE(node, target); } } @@ -5427,18 +5412,19 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t * Set the opening of a MultiTargetNode node. */ static void -pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) { - node->base.location.start = lparen->start; - node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen); +pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) { + PM_NODE_START_SET_TOKEN(parser, node, lparen); + PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen); + node->lparen_loc = TOK2LOC(parser, lparen); } /** * Set the closing of a MultiTargetNode node. */ static void -pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) { - node->base.location.end = rparen->end; - node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen); +pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen); + node->rparen_loc = TOK2LOC(parser, rparen); } /** @@ -5450,13 +5436,13 @@ pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY); *node = (pm_multi_write_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_MULTI_WRITE_NODE, flags, target, value), + .base = PM_NODE_INIT(parser, PM_MULTI_WRITE_NODE, flags, PM_LOCATION_INIT_NODES(target, value)), .lefts = target->lefts, .rest = target->rest, .rights = target->rights, .lparen_loc = target->lparen_loc, .rparen_loc = target->rparen_loc, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -5476,12 +5462,8 @@ pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t); *node = (pm_next_node_t) { - .base = ( - (arguments == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_NEXT_NODE, 0, keyword) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_NEXT_NODE, 0, keyword, arguments) - ), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_NEXT_NODE, 0, (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments)), + .keyword_loc = TOK2LOC(parser, keyword), .arguments = arguments }; @@ -5497,7 +5479,7 @@ pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t); *node = (pm_nil_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, token) + .base = PM_NODE_INIT(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -5513,9 +5495,9 @@ pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *oper pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t); *node = (pm_no_keywords_parameter_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, operator, keyword), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) + .base = PM_NODE_INIT(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, operator, keyword)), + .operator_loc = TOK2LOC(parser, operator), + .keyword_loc = TOK2LOC(parser, keyword) }; return node; @@ -5525,11 +5507,11 @@ pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *oper * Allocate and initialize a new NumberedParametersNode node. */ static pm_numbered_parameters_node_t * -pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) { +pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) { pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t); *node = (pm_numbered_parameters_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_PARAMETERS_NODE, 0, location), + .base = PM_NODE_INIT(parser, PM_NUMBERED_PARAMETERS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), .maximum = maximum }; @@ -5569,14 +5551,14 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to unsigned long value = strtoul(digits, &endptr, 10); if ((digits == endptr) || (*endptr != '\0')) { - pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL); + pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL); value = 0; } xfree(digits); if ((errno == ERANGE) || (value > NTH_REF_MAX)) { - PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start); + PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start); value = 0; } @@ -5594,7 +5576,7 @@ pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *na pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t); *node = (pm_numbered_reference_read_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, name), + .base = PM_NODE_INIT(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, name)), .number = pm_numbered_reference_read_node_number(parser, name) }; @@ -5609,10 +5591,10 @@ pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, c pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t); *node = (pm_optional_parameter_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_PARAMETER_NODE, 0, name, value), + .base = PM_NODE_INIT(parser, PM_OPTIONAL_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, name, value)), .name = pm_parser_constant_id_token(parser, name), - .name_loc = PM_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .name_loc = TOK2LOC(parser, name), + .operator_loc = TOK2LOC(parser, operator), .value = value }; @@ -5629,10 +5611,10 @@ pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operat pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t); *node = (pm_or_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_OR_NODE, 0, left, right), + .base = PM_NODE_INIT(parser, PM_OR_NODE, 0, PM_LOCATION_INIT_NODES(left, right)), .left = left, .right = right, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5646,7 +5628,7 @@ pm_parameters_node_create(pm_parser_t *parser) { pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t); *node = (pm_parameters_node_t) { - .base = PM_NODE_INIT_UNSET(parser, PM_PARAMETERS_NODE, 0), + .base = PM_NODE_INIT(parser, PM_PARAMETERS_NODE, 0, PM_LOCATION_INIT_UNSET), .rest = NULL, .keyword_rest = NULL, .block = NULL, @@ -5664,16 +5646,12 @@ pm_parameters_node_create(pm_parser_t *parser) { */ static void pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) { - if (params->base.location.start == NULL) { - params->base.location.start = param->location.start; - } else { - params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start; + if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) { + PM_NODE_START_SET_NODE(params, param); } - if (params->base.location.end == NULL) { - params->base.location.end = param->location.end; - } else { - params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end; + if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) { + PM_NODE_LENGTH_SET_NODE(params, param); } } @@ -5750,7 +5728,7 @@ pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_st pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t); *node = (pm_program_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_PROGRAM_NODE, 0, statements), + .base = PM_NODE_INIT(parser, PM_PROGRAM_NODE, 0, PM_LOCATION_INIT_NODE(statements)), .locals = *locals, .statements = statements }; @@ -5766,10 +5744,10 @@ pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_no pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t); *node = (pm_parentheses_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_PARENTHESES_NODE, flags, opening, closing), + .base = PM_NODE_INIT(parser, PM_PARENTHESES_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), .body = body, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing) }; return node; @@ -5783,11 +5761,11 @@ pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, con pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t); *node = (pm_pinned_expression_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_PINNED_EXPRESSION_NODE, 0, operator, rparen), + .base = PM_NODE_INIT(parser, PM_PINNED_EXPRESSION_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, operator, rparen)), .expression = expression, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen), - .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen) + .operator_loc = TOK2LOC(parser, operator), + .lparen_loc = TOK2LOC(parser, lparen), + .rparen_loc = TOK2LOC(parser, rparen) }; return node; @@ -5801,9 +5779,9 @@ pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t); *node = (pm_pinned_variable_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_PINNED_VARIABLE_NODE, 0, operator, variable), + .base = PM_NODE_INIT(parser, PM_PINNED_VARIABLE_NODE, 0, PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable)), .variable = variable, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5817,11 +5795,11 @@ pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, co pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t); *node = (pm_post_execution_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_POST_EXECUTION_NODE, 0, keyword, closing), + .base = PM_NODE_INIT(parser, PM_POST_EXECUTION_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)), .statements = statements, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) + .keyword_loc = TOK2LOC(parser, keyword), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing) }; return node; @@ -5835,11 +5813,11 @@ pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, con pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t); *node = (pm_pre_execution_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_PRE_EXECUTION_NODE, 0, keyword, closing), + .base = PM_NODE_INIT(parser, PM_PRE_EXECUTION_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)), .statements = statements, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) + .keyword_loc = TOK2LOC(parser, keyword), + .opening_loc = TOK2LOC(parser, opening), + .closing_loc = TOK2LOC(parser, closing) }; return node; @@ -5871,11 +5849,14 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope flags |= PM_NODE_FLAG_STATIC_LITERAL; } + uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left); + uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right); + *node = (pm_range_node_t) { - .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, (left == NULL ? operator->start : left->location.start), (right == NULL ? operator->end : right->location.end)), + .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, ((pm_location_t) { .start = start, .length = U32(end - start) })), .left = left, .right = right, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -5890,7 +5871,7 @@ pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t); *node = (pm_redo_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_REDO_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_REDO_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -5906,10 +5887,10 @@ pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_ pm_node_flags_t flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL; *node = (pm_regular_expression_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_REGULAR_EXPRESSION_NODE, flags, opening, closing), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .content_loc = PM_LOCATION_TOKEN_VALUE(content), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_REGULAR_EXPRESSION_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), + .opening_loc = TOK2LOC(parser, opening), + .content_loc = TOK2LOC(parser, content), + .closing_loc = TOK2LOC(parser, closing), .unescaped = *unescaped }; @@ -5932,7 +5913,7 @@ pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t); *node = (pm_required_parameter_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_PARAMETER_NODE, 0, token), + .base = PM_NODE_INIT(parser, PM_REQUIRED_PARAMETER_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)), .name = pm_parser_constant_id_token(parser, token) }; @@ -5947,9 +5928,9 @@ pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t); *node = (pm_rescue_modifier_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_RESCUE_MODIFIER_NODE, 0, expression, rescue_expression), + .base = PM_NODE_INIT(parser, PM_RESCUE_MODIFIER_NODE, 0, PM_LOCATION_INIT_NODES(expression, rescue_expression)), .expression = expression, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .keyword_loc = TOK2LOC(parser, keyword), .rescue_expression = rescue_expression }; @@ -5964,8 +5945,8 @@ pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) { pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t); *node = (pm_rescue_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_RESCUE_NODE, 0, keyword), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_RESCUE_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, keyword)), + .keyword_loc = TOK2LOC(parser, keyword), .operator_loc = { 0 }, .then_keyword_loc = { 0 }, .reference = NULL, @@ -5978,8 +5959,8 @@ pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) { } static inline void -pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) { - node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); +pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) { + node->operator_loc = TOK2LOC(parser, operator); } /** @@ -5988,7 +5969,7 @@ pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) static void pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) { node->reference = reference; - node->base.location.end = reference->location.end; + PM_NODE_LENGTH_SET_NODE(node, reference); } /** @@ -5998,7 +5979,7 @@ static void pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) { node->statements = statements; if (pm_statements_node_body_length(statements) > 0) { - node->base.location.end = statements->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, statements); } } @@ -6008,7 +5989,7 @@ pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *stat static void pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) { node->subsequent = subsequent; - node->base.location.end = subsequent->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, subsequent); } /** @@ -6017,7 +5998,7 @@ pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subseque static void pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) { pm_node_list_append(&node->exceptions, exception); - node->base.location.end = exception->location.end; + PM_NODE_LENGTH_SET_NODE(node, exception); } /** @@ -6028,14 +6009,10 @@ pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, c pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t); *node = (pm_rest_parameter_node_t) { - .base = ( - (name->type == PM_TOKEN_NOT_PROVIDED) - ? PM_NODE_INIT_TOKEN(parser, PM_REST_PARAMETER_NODE, 0, operator) - : PM_NODE_INIT_TOKENS(parser, PM_REST_PARAMETER_NODE, 0, operator, name) - ), - .name = pm_parser_optional_constant_id_token(parser, name), - .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) + .base = PM_NODE_INIT(parser, PM_REST_PARAMETER_NODE, 0, (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name)), + .name = name == NULL ? 0 : pm_parser_constant_id_token(parser, name), + .name_loc = NTOK2LOC(parser, name), + .operator_loc = TOK2LOC(parser, operator) }; return node; @@ -6050,7 +6027,7 @@ pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t); *node = (pm_retry_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_RETRY_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_RETRY_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -6064,12 +6041,8 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t); *node = (pm_return_node_t) { - .base = ( - (arguments == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_RETURN_NODE, 0, keyword) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_RETURN_NODE, 0, keyword, arguments) - ), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_RETURN_NODE, 0, (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments)), + .keyword_loc = TOK2LOC(parser, keyword), .arguments = arguments }; @@ -6085,7 +6058,7 @@ pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t); *node = (pm_self_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_SELF_NODE, 0, token) + .base = PM_NODE_INIT(parser, PM_SELF_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -6099,7 +6072,7 @@ pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shar pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t); *node = (pm_shareable_constant_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, write), + .base = PM_NODE_INIT(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, PM_LOCATION_INIT_NODE(write)), .write = write }; @@ -6114,13 +6087,13 @@ pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *local pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t); *node = (pm_singleton_class_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_SINGLETON_CLASS_NODE, 0, class_keyword, end_keyword), + .base = PM_NODE_INIT(parser, PM_SINGLETON_CLASS_NODE, 0, PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword)), .locals = *locals, - .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .class_keyword_loc = TOK2LOC(parser, class_keyword), + .operator_loc = TOK2LOC(parser, operator), .expression = expression, .body = body, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword) + .end_keyword_loc = TOK2LOC(parser, end_keyword) }; return node; @@ -6135,7 +6108,7 @@ pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t); *node = (pm_source_encoding_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, token) + .base = PM_NODE_INIT(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -6161,7 +6134,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) } *node = (pm_source_file_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_FILE_NODE, flags, file_keyword), + .base = PM_NODE_INIT(parser, PM_SOURCE_FILE_NODE, flags, PM_LOCATION_INIT_TOKEN(parser, file_keyword)), .filepath = parser->filepath }; @@ -6177,7 +6150,7 @@ pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t); *node = (pm_source_line_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token) + .base = PM_NODE_INIT(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -6191,12 +6164,8 @@ pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t); *node = (pm_splat_node_t) { - .base = ( - (expression == NULL) - ? PM_NODE_INIT_TOKEN(parser, PM_SPLAT_NODE, 0, operator) - : PM_NODE_INIT_TOKEN_NODE(parser, PM_SPLAT_NODE, 0, operator, expression) - ), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), + .base = PM_NODE_INIT(parser, PM_SPLAT_NODE, 0, (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression)), + .operator_loc = TOK2LOC(parser, operator), .expression = expression }; @@ -6211,7 +6180,7 @@ pm_statements_node_create(pm_parser_t *parser) { pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t); *node = (pm_statements_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_STATEMENTS_NODE, 0), + .base = PM_NODE_INIT(parser, PM_STATEMENTS_NODE, 0, PM_LOCATION_INIT_UNSET), .body = { 0 } }; @@ -6226,26 +6195,18 @@ pm_statements_node_body_length(pm_statements_node_t *node) { return node && node->body.size; } -/** - * Set the location of the given StatementsNode. - */ -static void -pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) { - node->base.location = (pm_location_t) { .start = start, .end = end }; -} - /** * Update the location of the statements node based on the statement that is * being added to the list. */ static inline void pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) { - if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) { - node->base.location.start = statement->location.start; + if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) { + PM_NODE_START_SET_NODE(node, statement); } - if (statement->location.end > node->base.location.end) { - node->base.location.end = statement->location.end; + if (PM_NODE_END(statement) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, statement); } } @@ -6303,14 +6264,14 @@ pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, break; } - const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start); - const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end); + uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening); + uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing); *node = (pm_string_node_t) { - .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, start, end), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .content_loc = PM_LOCATION_TOKEN_VALUE(content), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .opening_loc = NTOK2LOC(parser, opening), + .content_loc = TOK2LOC(parser, content), + .closing_loc = NTOK2LOC(parser, closing), .unescaped = *string }; @@ -6344,14 +6305,12 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument assert(keyword->type == PM_TOKEN_KEYWORD_SUPER); pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t); - const uint8_t *end = pm_arguments_end(arguments); - if (end == NULL) { - assert(false && "unreachable"); - } + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); *node = (pm_super_node_t) { - .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, keyword->start, end), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) })), + .keyword_loc = TOK2LOC(parser, keyword), .lparen_loc = arguments->opening_loc, .arguments = arguments->arguments, .rparen_loc = arguments->closing_loc, @@ -6386,7 +6345,7 @@ parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *locat size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor); if (width == 0) { - pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL); + pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL); break; } @@ -6406,7 +6365,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca size_t width = encoding->char_width(cursor, end - cursor); if (width == 0) { - pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL); + pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL); break; } @@ -6466,13 +6425,13 @@ parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, con if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) { if (!ascii_only) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name); } } else if (parser->encoding != modifier_encoding) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name); if (modifier == 'n' && !ascii_only) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source)); } } @@ -6483,18 +6442,18 @@ parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, con bool mixed_encoding = false; if (mixed_encoding) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source)); } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) { // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily. bool valid_string_in_modifier_encoding = true; if (!valid_string_in_modifier_encoding) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source)); } } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) { // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now. if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source)); } } @@ -6513,7 +6472,7 @@ parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_str // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report. bool valid_unicode_range = true; if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source)); return flags; } @@ -6522,7 +6481,7 @@ parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_str if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) { // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the // following error message appearing twice. We do the same for compatibility. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name); } /** @@ -6579,14 +6538,14 @@ static pm_symbol_node_t * pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) { pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t); - const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start); - const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end); + uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening); + uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing); *node = (pm_symbol_node_t) { - .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, start, end), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .value_loc = PM_LOCATION_TOKEN_VALUE(value), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .opening_loc = NTOK2LOC(parser, opening), + .value_loc = NTOK2LOC(parser, value), + .closing_loc = NTOK2LOC(parser, closing), .unescaped = *unescaped }; @@ -6616,35 +6575,15 @@ pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *open */ static pm_symbol_node_t * pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) { - pm_symbol_node_t *node; - - switch (token->type) { - case PM_TOKEN_LABEL: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end }; + assert(token->type == PM_TOKEN_LABEL); - pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 }; - node = pm_symbol_node_create(parser, &opening, &label, &closing); + pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end }; + pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 }; + pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing); - assert((label.end - label.start) >= 0); - pm_string_shared_init(&node->unescaped, label.start, label.end); - pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false)); - - break; - } - case PM_TOKEN_MISSING: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end }; - node = pm_symbol_node_create(parser, &opening, &label, &closing); - break; - } - default: - assert(false && "unreachable"); - node = NULL; - break; - } + assert((label.end - label.start) >= 0); + pm_string_shared_init(&node->unescaped, label.start, label.end); + pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false)); return node; } @@ -6657,8 +6596,8 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) { pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t); *node = (pm_symbol_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING), - .value_loc = PM_LOCATION_NULL_VALUE(parser), + .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING, PM_LOCATION_INIT_UNSET), + .value_loc = { 0 }, .unescaped = { 0 } }; @@ -6670,21 +6609,29 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) { * Check if the given node is a label in a hash. */ static bool -pm_symbol_node_label_p(pm_node_t *node) { - const uint8_t *end = NULL; +pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) { + const pm_location_t *location = NULL; switch (PM_NODE_TYPE(node)) { - case PM_SYMBOL_NODE: - end = ((pm_symbol_node_t *) node)->closing_loc.end; + case PM_SYMBOL_NODE: { + const pm_symbol_node_t *cast = (pm_symbol_node_t *) node; + if (cast->closing_loc.length > 0) { + location = &cast->closing_loc; + } break; - case PM_INTERPOLATED_SYMBOL_NODE: - end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end; + } + case PM_INTERPOLATED_SYMBOL_NODE: { + const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node; + if (cast->closing_loc.length > 0) { + location = &cast->closing_loc; + } break; + } default: return false; } - return (end != NULL) && (end[-1] == ':'); + return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':'); } /** @@ -6695,14 +6642,19 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t); *new_node = (pm_symbol_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing), - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), + .opening_loc = TOK2LOC(parser, opening), .value_loc = node->content_loc, - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .closing_loc = TOK2LOC(parser, closing), .unescaped = node->unescaped }; - pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end }; + pm_token_t content = { + .type = PM_TOKEN_IDENTIFIER, + .start = parser->start + node->content_loc.start, + .end = parser->start + node->content_loc.start + node->content_loc.length + }; + pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true)); // We are explicitly _not_ using pm_node_destroy here because we don't want @@ -6731,7 +6683,7 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) { } *new_node = (pm_string_node_t) { - .base = PM_NODE_INIT_NODE(parser, PM_STRING_NODE, flags, node), + .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, PM_LOCATION_INIT_NODE(node)), .opening_loc = node->opening_loc, .content_loc = node->value_loc, .closing_loc = node->closing_loc, @@ -6755,7 +6707,7 @@ pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t); *node = (pm_true_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token) + .base = PM_NODE_INIT(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_TOKEN(parser, token)) }; return node; @@ -6769,7 +6721,7 @@ pm_true_node_synthesized_create(pm_parser_t *parser) { pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t); *node = (pm_true_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL) + .base = PM_NODE_INIT(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, PM_LOCATION_INIT_UNSET) }; return node; @@ -6784,8 +6736,8 @@ pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) { pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t); *node = (pm_undef_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_UNDEF_NODE, 0, token), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(token), + .base = PM_NODE_INIT(parser, PM_UNDEF_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, token)), + .keyword_loc = TOK2LOC(parser, token), .names = { 0 } }; @@ -6797,7 +6749,7 @@ pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) { */ static void pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) { - node->base.location.end = name->location.end; + PM_NODE_LENGTH_SET_NODE(node, name); pm_node_list_append(&node->names, name); } @@ -6812,10 +6764,10 @@ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t pm_node_t *end = statements == NULL ? predicate : UP(statements); *node = (pm_unless_node_t) { - .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, keyword, end), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end)), + .keyword_loc = TOK2LOC(parser, keyword), .predicate = predicate, - .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword), + .then_keyword_loc = NTOK2LOC(parser, then_keyword), .statements = statements, .else_clause = NULL, .end_keyword_loc = { 0 } @@ -6836,8 +6788,8 @@ pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_statements_node_body_append(parser, statements, statement, true); *node = (pm_unless_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword), + .base = PM_NODE_INIT(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, PM_LOCATION_INIT_NODES(statement, predicate)), + .keyword_loc = TOK2LOC(parser, unless_keyword), .predicate = predicate, .then_keyword_loc = { 0 }, .statements = statements, @@ -6849,9 +6801,9 @@ pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const } static inline void -pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) { - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword); - node->base.location.end = end_keyword->end; +pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) { + node->end_keyword_loc = TOK2LOC(parser, end_keyword); + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); } /** @@ -6866,7 +6818,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen // All of the block exits that we want to remove should be within the // statements, and since we are modifying the statements, we shouldn't have // to check the end location. - const uint8_t *start = statements->base.location.start; + uint32_t start = statements->base.location.start; for (size_t index = parser->current_block_exits->size; index > 0; index--) { pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1]; @@ -6886,10 +6838,10 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); *node = (pm_until_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_UNTIL_NODE, flags, keyword, closing), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_UNTIL_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)), + .keyword_loc = TOK2LOC(parser, keyword), + .do_keyword_loc = NTOK2LOC(parser, do_keyword), + .closing_loc = TOK2LOC(parser, closing), .predicate = predicate, .statements = statements }; @@ -6907,8 +6859,8 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm pm_loop_modifier_block_exits(parser, statements); *node = (pm_until_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_UNTIL_NODE, flags, statements, predicate), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_UNTIL_NODE, flags, PM_LOCATION_INIT_NODES(statements, predicate)), + .keyword_loc = TOK2LOC(parser, keyword), .do_keyword_loc = { 0 }, .closing_loc = { 0 }, .predicate = predicate, @@ -6926,8 +6878,8 @@ pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) { pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t); *node = (pm_when_node_t) { - .base = PM_NODE_INIT_TOKEN(parser, PM_WHEN_NODE, 0, keyword), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_WHEN_NODE, 0, PM_LOCATION_INIT_TOKEN(parser, keyword)), + .keyword_loc = TOK2LOC(parser, keyword), .statements = NULL, .then_keyword_loc = { 0 }, .conditions = { 0 } @@ -6941,7 +6893,7 @@ pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) { */ static void pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) { - node->base.location.end = condition->location.end; + PM_NODE_LENGTH_SET_NODE(node, condition); pm_node_list_append(&node->conditions, condition); } @@ -6949,9 +6901,9 @@ pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) { * Set the location of the then keyword of a when node. */ static inline void -pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) { - node->base.location.end = then_keyword->end; - node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword); +pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword); + node->then_keyword_loc = TOK2LOC(parser, then_keyword); } /** @@ -6959,8 +6911,8 @@ pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_k */ static void pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) { - if (statements->base.location.end > node->base.location.end) { - node->base.location.end = statements->base.location.end; + if (PM_NODE_END(statements) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, statements); } node->statements = statements; @@ -6975,10 +6927,10 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); *node = (pm_while_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_WHILE_NODE, flags, keyword, closing), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_WHILE_NODE, flags, PM_LOCATION_INIT_TOKENS(parser, keyword, closing)), + .keyword_loc = TOK2LOC(parser, keyword), + .do_keyword_loc = NTOK2LOC(parser, do_keyword), + .closing_loc = TOK2LOC(parser, closing), .predicate = predicate, .statements = statements }; @@ -6996,8 +6948,8 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm pm_loop_modifier_block_exits(parser, statements); *node = (pm_while_node_t) { - .base = PM_NODE_INIT_NODES(parser, PM_WHILE_NODE, flags, statements, predicate), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_WHILE_NODE, flags, PM_LOCATION_INIT_NODES(statements, predicate)), + .keyword_loc = TOK2LOC(parser, keyword), .do_keyword_loc = { 0 }, .closing_loc = { 0 }, .predicate = predicate, @@ -7015,10 +6967,10 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t); *node = (pm_while_node_t) { - .base = PM_NODE_INIT_BASE(parser, PM_WHILE_NODE, 0), - .keyword_loc = PM_LOCATION_NULL_VALUE(parser), - .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser), - .closing_loc = PM_LOCATION_NULL_VALUE(parser), + .base = PM_NODE_INIT(parser, PM_WHILE_NODE, 0, PM_LOCATION_INIT_UNSET), + .keyword_loc = { 0 }, + .do_keyword_loc = { 0 }, + .closing_loc = { 0 }, .predicate = predicate, .statements = statements }; @@ -7035,10 +6987,10 @@ pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t); *node = (pm_x_string_node_t) { - .base = PM_NODE_INIT_TOKENS(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, opening, closing), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .content_loc = PM_LOCATION_TOKEN_VALUE(content), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), + .base = PM_NODE_INIT(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, PM_LOCATION_INIT_TOKENS(parser, opening, closing)), + .opening_loc = TOK2LOC(parser, opening), + .content_loc = TOK2LOC(parser, content), + .closing_loc = TOK2LOC(parser, closing), .unescaped = *unescaped }; @@ -7060,20 +7012,22 @@ static pm_yield_node_t * pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) { pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t); - const uint8_t *end; - if (rparen_loc->start != NULL) { - end = rparen_loc->end; + uint32_t start = PM_TOKEN_START(parser, keyword); + uint32_t end; + + if (rparen_loc->length > 0) { + end = PM_LOCATION_END(rparen_loc); } else if (arguments != NULL) { - end = arguments->base.location.end; - } else if (lparen_loc->start != NULL) { - end = lparen_loc->end; + end = PM_NODE_END(arguments); + } else if (lparen_loc->length > 0) { + end = PM_LOCATION_END(lparen_loc); } else { - end = keyword->end; + end = PM_TOKEN_END(parser, keyword); } *node = (pm_yield_node_t) { - .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, keyword->start, end), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), + .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, ((pm_location_t) { .start = start, .length = U32(end - start) })), + .keyword_loc = TOK2LOC(parser, keyword), .lparen_loc = *lparen_loc, .arguments = arguments, .rparen_loc = *rparen_loc @@ -7117,25 +7071,33 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) { */ static inline void pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) { - pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads); + pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads); } /** * Add a local variable from a location to the current scope. */ static pm_constant_id_t -pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) { - pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end); +pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) { + pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end); if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads); return constant_id; } +/** + * Add a local variable from a location to the current scope. + */ +static inline pm_constant_id_t +pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) { + return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads); +} + /** * Add a local variable from a token to the current scope. */ static inline pm_constant_id_t pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) { - return pm_parser_local_add_location(parser, token->start, token->end, reads); + return pm_parser_local_add_raw(parser, token->start, token->end, reads); } /** @@ -7169,7 +7131,7 @@ static bool pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) { // We want to check whether the parameter name is a numbered parameter or // not. - pm_refute_numbered_parameter(parser, name->start, name->end); + pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name)); // Otherwise we'll fetch the constant id for the parameter name and check // whether it's already in the current scope. @@ -7434,7 +7396,7 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) { // issue because we didn't understand the encoding that the user was // trying to use. In this case we'll keep using the default encoding but // add an error to the parser to indicate an unsuccessful parse. - pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT); + pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT); } } @@ -7602,7 +7564,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID: PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_MAGIC_COMMENT_VALUE, (int) key_length, (const char *) key_source, @@ -7629,7 +7591,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID: PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_MAGIC_COMMENT_VALUE, (int) key_length, (const char *) key_source, @@ -7664,7 +7626,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { } else { PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_MAGIC_COMMENT_VALUE, (int) key_length, (const char *) key_source, @@ -7682,10 +7644,8 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { // Allocate a new magic comment node to append to the parser's list. pm_magic_comment_t *magic_comment; if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) { - magic_comment->key_start = key_start; - magic_comment->value_start = value_start; - magic_comment->key_length = (uint32_t) key_length; - magic_comment->value_length = value_length; + magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) }; + magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length }; pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment); } } @@ -7923,7 +7883,7 @@ static inline void pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) { if (invalid != NULL) { pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER; - pm_parser_err(parser, invalid, invalid + 1, diag_id); + pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id); } } @@ -8108,7 +8068,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { const uint8_t *fraction_start = parser->current.end; const uint8_t *fraction_end = parser->current.end + 2; fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end); - pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION); + pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION); } return type; @@ -8208,7 +8168,7 @@ lex_global_variable(pm_parser_t *parser) { // $0 isn't allowed to be followed by anything. pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id); } return PM_TOKEN_GLOBAL_VARIABLE; @@ -8245,8 +8205,8 @@ lex_global_variable(pm_parser_t *parser) { // If we get here, then we have a $ followed by something that // isn't recognized as a global variable. pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; - const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start); + size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start); } return PM_TOKEN_GLOBAL_VARIABLE; @@ -8445,8 +8405,8 @@ current_token_starts_line(pm_parser_t *parser) { * handle interpolation. This function performs that check. It returns a token * type representing what it found. Those cases are: * - * * PM_TOKEN_NOT_PROVIDED - No interpolation was found at this point. The - * caller should keep lexing. + * * 0 - No interpolation was found at this point. The caller should keep + * lexing. * * PM_TOKEN_STRING_CONTENT - No interpolation was found at this point. The * caller should return this token type. * * PM_TOKEN_EMBEXPR_BEGIN - An embedded expression was found. The caller @@ -8463,9 +8423,9 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { return PM_TOKEN_STRING_CONTENT; } - // Now we'll check against the character that follows the #. If it constitutes - // valid interplation, we'll handle that, otherwise we'll return - // PM_TOKEN_NOT_PROVIDED. + // Now we'll check against the character that follows the #. If it + // constitutes valid interplation, we'll handle that, otherwise we'll return + // 0. switch (pound[1]) { case '@': { // In this case we may have hit an embedded instance or class variable. @@ -8499,7 +8459,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { // string content. This is like if we get "#@-". In this case the caller // should keep lexing. parser->current.end = pound + 1; - return PM_TOKEN_NOT_PROVIDED; + return 0; } case '$': // In this case we may have hit an embedded global variable. If there's @@ -8549,7 +8509,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { // In this case we've hit a #$ that does not indicate a global variable. // In this case we'll continue lexing past it. parser->current.end = pound + 1; - return PM_TOKEN_NOT_PROVIDED; + return 0; case '{': // In this case it's the start of an embedded expression. If we have // already consumed content, then we need to return that content as string @@ -8573,7 +8533,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { // mark that by returning the not provided token type. This tells the // consumer to keep lexing forward. parser->current.end = pound + 1; - return PM_TOKEN_NOT_PROVIDED; + return 0; } } @@ -8628,9 +8588,9 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const // codepoint and not a surrogate pair. if (value >= 0xD800 && value <= 0xDFFF) { if (error_location != NULL) { - pm_parser_err(parser, error_location->start, error_location->end, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE); } else { - pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE); } return 0xFFFD; } @@ -8658,14 +8618,14 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla // literal. if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) { if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) { - PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name); } parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY; } if (!pm_buffer_append_unicode_codepoint(buffer, value)) { - pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE); pm_buffer_append_byte(buffer, 0xEF); pm_buffer_append_byte(buffer, 0xBF); pm_buffer_append_byte(buffer, 0xBD); @@ -8680,7 +8640,7 @@ static inline void escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) { if (byte >= 0x80) { if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name); } parser->explicit_encoding = parser->encoding; @@ -8751,7 +8711,7 @@ escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *t PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_CHARACTER, FLAG(flags), FLAG(flag), @@ -8879,7 +8839,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (parser->current.end == parser->end) { const uint8_t *start = parser->current.end - 2; - PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); } else if (peek(parser) == '{') { const uint8_t *unicode_codepoints_start = parser->current.end - 2; parser->current.end++; @@ -8908,7 +8868,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (hexadecimal_length > 6) { // \u{nnnn} character literal allows only 1-6 hexadecimal digits - pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG); + pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG); } else if (hexadecimal_length == 0) { // there are not hexadecimal characters @@ -8918,8 +8878,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre // error instead of us. pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); } else { - pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE); - pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM); + pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM); } return; @@ -8940,11 +8900,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre // ?\u{nnnn} character literal should contain only one codepoint // and cannot be like ?\u{nnnn mmmm}. if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) { - pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL); + pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL); } if (parser->current.end == parser->end) { - PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start); } else if (peek(parser) == '}') { parser->current.end++; } else { @@ -8954,7 +8914,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre // instead of us. pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); } else { - pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM); + pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM); } } @@ -8969,7 +8929,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); } else { const uint8_t *start = parser->current.end - 2; - PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); } } else if (length == 4) { uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL); @@ -9018,7 +8978,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre parser->current.end++; if (match(parser, 'u') || match(parser, 'U')) { - pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER); return; } @@ -9054,7 +9014,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (peek(parser) != '-') { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL); return; } @@ -9075,7 +9035,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre parser->current.end++; if (match(parser, 'u') || match(parser, 'U')) { - pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER); return; } @@ -9094,7 +9054,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre default: { if (!char_is_ascii_printable(peeked)) { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL); return; } @@ -9112,7 +9072,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (peek(parser) != '-') { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META); return; } @@ -9128,7 +9088,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre parser->current.end++; if (match(parser, 'u') || match(parser, 'U')) { - pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER); return; } @@ -9147,7 +9107,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre default: if (!char_is_ascii_printable(peeked)) { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META); return; } @@ -9167,7 +9127,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre default: { if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META); return; } if (parser->current.end < parser->end) { @@ -9280,7 +9240,7 @@ lex_at_variable(pm_parser_t *parser) { } size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end); - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start); } else { pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE; pm_parser_err_token(parser, &parser->current, diag_id); @@ -9315,7 +9275,7 @@ parser_comment(pm_parser_t *parser, pm_comment_type_t type) { *comment = (pm_comment_t) { .type = type, - .location = { parser->current.start, parser->current.end } + .location = TOK2LOC(parser, &parser->current) }; return comment; @@ -9334,7 +9294,7 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_newline_list_append(&parser->newline_list, newline); + pm_newline_list_append(&parser->newline_list, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } @@ -9342,6 +9302,7 @@ lex_embdoc(pm_parser_t *parser) { parser_lex_callback(parser); // Now, create a comment that is going to be attached to the parser. + const uint8_t *comment_start = parser->current.start; pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC); if (comment == NULL) return PM_TOKEN_EOF; @@ -9367,14 +9328,14 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_newline_list_append(&parser->newline_list, newline); + pm_newline_list_append(&parser->newline_list, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } parser->current.type = PM_TOKEN_EMBDOC_END; parser_lex_callback(parser); - comment->location.end = parser->current.end; + comment->location.length = (uint32_t) (parser->current.end - comment_start); pm_list_append(&parser->comment_list, (pm_list_node_t *) comment); return PM_TOKEN_EMBDOC_END; @@ -9387,7 +9348,7 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_newline_list_append(&parser->newline_list, newline); + pm_newline_list_append(&parser->newline_list, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } @@ -9397,7 +9358,7 @@ lex_embdoc(pm_parser_t *parser) { pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM); - comment->location.end = parser->current.end; + comment->location.length = (uint32_t) (parser->current.end - comment_start); pm_list_append(&parser->comment_list, (pm_list_node_t *) comment); return PM_TOKEN_EOF; @@ -9701,7 +9662,7 @@ pm_lex_percent_delimiter(pm_parser_t *parser) { parser_flush_heredoc_end(parser); } else { // Otherwise, we'll add the newline to the list of newlines. - pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current) + U32(eol_length)); } uint8_t delimiter = *parser->current.end; @@ -9786,7 +9747,7 @@ parser_lex(pm_parser_t *parser) { if (match_eol_offset(parser, 1)) { chomping = false; } else { - pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN); + pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN); parser->current.end++; space_seen = true; } @@ -9799,7 +9760,7 @@ parser_lex(pm_parser_t *parser) { parser->heredoc_end = NULL; } else { parser->current.end += eol_length + 1; - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current)); space_seen = true; } } else if (pm_char_is_inline_whitespace(*parser->current.end)) { @@ -9893,7 +9854,7 @@ parser_lex(pm_parser_t *parser) { } if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current)); } } @@ -10092,7 +10053,7 @@ parser_lex(pm_parser_t *parser) { // , case ',': if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); } lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL); @@ -10218,7 +10179,7 @@ parser_lex(pm_parser_t *parser) { } else if (lex_state_beg_p(parser)) { type = PM_TOKEN_USTAR_STAR; } else if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix"); } if (lex_state_operator_p(parser)) { @@ -10243,7 +10204,7 @@ parser_lex(pm_parser_t *parser) { } else if (lex_state_beg_p(parser)) { type = PM_TOKEN_USTAR; } else if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix"); } if (lex_state_operator_p(parser)) { @@ -10369,7 +10330,7 @@ parser_lex(pm_parser_t *parser) { bool ident_error = false; if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) { - pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER); + pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER); ident_error = true; } @@ -10402,7 +10363,7 @@ parser_lex(pm_parser_t *parser) { } else { // Otherwise, we want to indicate that the body of the // heredoc starts on the character after the next newline. - pm_newline_list_append(&parser->newline_list, body_start); + pm_newline_list_append(&parser->newline_list, U32(body_start - parser->start + 1)); body_start++; } @@ -10421,7 +10382,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document"); } if (lex_state_operator_p(parser)) { @@ -10547,7 +10508,7 @@ parser_lex(pm_parser_t *parser) { } else if (lex_state_beg_p(parser)) { type = PM_TOKEN_UAMPERSAND; } else if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix"); } if (lex_state_operator_p(parser)) { @@ -10623,7 +10584,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator"); } lex_state_set(parser, PM_LEX_STATE_BEG); @@ -10664,7 +10625,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator"); } lex_state_set(parser, PM_LEX_STATE_BEG); @@ -10763,7 +10724,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal"); } if (lex_state_operator_p(parser)) { @@ -10948,7 +10909,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal"); } lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG); @@ -10984,40 +10945,40 @@ parser_lex(pm_parser_t *parser) { // token after adding an appropriate error message. if (!width) { if (*parser->current.start >= 0x80) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start); } else if (*parser->current.start == '\\') { switch (peek_at(parser, parser->current.start + 1)) { case ' ': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space"); break; case '\f': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed"); break; case '\t': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab"); break; case '\v': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab"); break; case '\r': if (peek_at(parser, parser->current.start + 2) != '\n') { parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return"); break; } PRISM_FALLTHROUGH default: - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash"); break; } } else if (char_is_ascii_printable(*parser->current.start)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start); } else { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start); } goto lex_next_token; @@ -11043,15 +11004,15 @@ parser_lex(pm_parser_t *parser) { // correct column information for it. const uint8_t *cursor = parser->current.end; while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) { - pm_newline_list_append(&parser->newline_list, cursor++); + pm_newline_list_append(&parser->newline_list, U32(++cursor - parser->start)); } parser->current.end = parser->end; parser->current.type = PM_TOKEN___END__; parser_lex_callback(parser); - parser->data_loc.start = parser->current.start; - parser->data_loc.end = parser->current.end; + parser->data_loc.start = PM_TOKEN_START(parser, &parser->current); + parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current); LEX(PM_TOKEN_EOF); } @@ -11076,7 +11037,7 @@ parser_lex(pm_parser_t *parser) { !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) && (type == PM_TOKEN_IDENTIFIER) && ((pm_parser_local_depth(parser, &parser->current) != -1) || - pm_token_is_numbered_parameter(parser->current.start, parser->current.end)) + pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))) ) { lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL); } @@ -11104,7 +11065,7 @@ parser_lex(pm_parser_t *parser) { whitespace += 1; } } else { - whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list); + whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, PM_TOKEN_END(parser, &parser->current)); } if (whitespace > 0) { @@ -11219,7 +11180,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_newline_list_append(&parser->newline_list, parser->current.end); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -11247,7 +11208,7 @@ parser_lex(pm_parser_t *parser) { if (*breakpoint == '#') { pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had something // that looked like an interpolated class or instance variable // like "#@" but wasn't actually. In this case we'll just skip @@ -11357,7 +11318,7 @@ parser_lex(pm_parser_t *parser) { // would have already have added the newline to the // list. if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current)); } } else { parser->current.end = breakpoint + 1; @@ -11404,7 +11365,7 @@ parser_lex(pm_parser_t *parser) { // If we've hit a newline, then we need to track that in // the list of newlines. if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, breakpoint); + pm_newline_list_append(&parser->newline_list, U32(breakpoint - parser->start + 1)); parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false); break; @@ -11452,7 +11413,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_newline_list_append(&parser->newline_list, parser->current.end); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -11499,7 +11460,7 @@ parser_lex(pm_parser_t *parser) { // interpolation. pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had // something that looked like an interpolated class or // instance variable like "#@" but wasn't actually. In @@ -11617,7 +11578,7 @@ parser_lex(pm_parser_t *parser) { // would have already have added the newline to the // list. if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current)); } } else { parser->current.end = breakpoint + 1; @@ -11669,7 +11630,7 @@ parser_lex(pm_parser_t *parser) { // for the terminator in case the terminator is a // newline character. if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, breakpoint); + pm_newline_list_append(&parser->newline_list, U32(breakpoint - parser->start + 1)); parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true); break; @@ -11723,7 +11684,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_newline_list_append(&parser->newline_list, parser->current.end); + pm_newline_list_append(&parser->newline_list, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -11752,7 +11713,7 @@ parser_lex(pm_parser_t *parser) { case '#': { pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had something that // looked like an interpolated class or instance variable like "#@" // but wasn't actually. In this case we'll just skip to the next @@ -11852,7 +11813,7 @@ parser_lex(pm_parser_t *parser) { (memcmp(terminator_start, ident_start, ident_length) == 0) ) { if (newline != NULL) { - pm_newline_list_append(&parser->newline_list, newline); + pm_newline_list_append(&parser->newline_list, U32(newline - parser->start + 1)); } parser->current.end = terminator_end; @@ -11924,7 +11885,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } - pm_newline_list_append(&parser->newline_list, breakpoint); + pm_newline_list_append(&parser->newline_list, U32(breakpoint - parser->start + 1)); // If we have a - or ~ heredoc, then we can match after // some leading whitespace. @@ -12044,7 +12005,7 @@ parser_lex(pm_parser_t *parser) { const uint8_t *end = parser->current.end; if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, end); + pm_newline_list_append(&parser->newline_list, U32(end - parser->start + 1)); } // Here we want the buffer to only @@ -12076,7 +12037,7 @@ parser_lex(pm_parser_t *parser) { case '#': { pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had // something that looked like an interpolated class // or instance variable like "#@" but wasn't @@ -12390,10 +12351,10 @@ expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) { if (accept1(parser, type)) return; const uint8_t *location = parser->previous.end; - pm_parser_err(parser, location, location, diag_id); + pm_parser_err(parser, U32(location - parser->start), 0, diag_id); parser->previous.start = location; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } /** @@ -12405,10 +12366,10 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di if (accept2(parser, type1, type2)) return; const uint8_t *location = parser->previous.end; - pm_parser_err(parser, location, location, diag_id); + pm_parser_err(parser, U32(location - parser->start), 0, diag_id); parser->previous.start = location; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } /** @@ -12422,7 +12383,7 @@ expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ide } else { pm_parser_err_heredoc_term(parser, ident_start, ident_length); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } } @@ -12436,10 +12397,11 @@ static void expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) { if (accept1(parser, type)) return; - pm_parser_err(parser, opening->start, opening->end, diag_id); + const uint8_t *start = opening->start; + pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } static pm_node_t * @@ -12663,7 +12625,7 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) { default: break; } - pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end); + pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target)); pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0); pm_node_destroy(parser, target); @@ -12725,8 +12687,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p target->type = PM_GLOBAL_VARIABLE_TARGET_NODE; return target; case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) { - PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) { + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target)); pm_node_unreference(parser, target); } @@ -12777,10 +12739,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p // target then this is either a method call or a local variable // write. if ( - (call->message_loc.start != NULL) && - (call->message_loc.end[-1] != '!') && - (call->message_loc.end[-1] != '?') && - (call->opening_loc.start == NULL) && + (call->message_loc.length > 0) && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') && + (call->opening_loc.length == 0) && (call->arguments == NULL) && (call->block == NULL) ) { @@ -12794,15 +12756,14 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p // When it was parsed in the prefix position, foo was seen as a // method call with no receiver and no arguments. Now we have an // =, so we know it's a local variable write. - const pm_location_t message_loc = call->message_loc; - - pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0); + pm_location_t message_loc = call->message_loc; + pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0); pm_node_destroy(parser, target); return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0)); } - if (peek_at(parser, call->message_loc.start) == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) { + if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) { if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION); } @@ -12910,22 +12871,21 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod case PM_LOCAL_VARIABLE_READ_NODE: { pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target; + pm_location_t location = target->location; pm_constant_id_t name = local_read->name; - pm_location_t name_loc = target->location; - uint32_t depth = local_read->depth; pm_scope_t *scope = pm_parser_scope_find(parser, depth); - if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) { + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) { pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED; - PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start); + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target)); pm_node_unreference(parser, target); } pm_locals_unread(&scope->locals, name); pm_node_destroy(parser, target); - return UP(pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator)); + return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator)); } case PM_IT_LOCAL_VARIABLE_READ_NODE: { pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2); @@ -12962,10 +12922,10 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod // target then this is either a method call or a local variable // write. if ( - (call->message_loc.start != NULL) && - (call->message_loc.end[-1] != '!') && - (call->message_loc.end[-1] != '?') && - (call->opening_loc.start == NULL) && + (call->message_loc.length > 0) && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') && + (call->opening_loc.length == 0) && (call->arguments == NULL) && (call->block == NULL) ) { @@ -12979,19 +12939,19 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod // When it was parsed in the prefix position, foo was seen as a // method call with no receiver and no arguments. Now we have an // =, so we know it's a local variable write. - const pm_location_t message = call->message_loc; + pm_location_t message_loc = call->message_loc; - pm_parser_local_add_location(parser, message.start, message.end, 0); + pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length); + pm_parser_local_add_location(parser, &message_loc, 0); pm_node_destroy(parser, target); - pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end); - target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator)); + pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc)); + target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator)); - pm_refute_numbered_parameter(parser, message.start, message.end); return target; } - if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) { + if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) { // When we get here, we have a method call, because it was // previously marked as a method call but now we have an =. This // looks like: @@ -13006,8 +12966,8 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod call->arguments = arguments; pm_arguments_node_arguments_append(arguments, value); - call->base.location.end = arguments->base.location.end; - call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator); + PM_NODE_LENGTH_SET_NODE(call, arguments); + call->equal_loc = TOK2LOC(parser, operator); parse_write_name(parser, &call->name); pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)); @@ -13025,11 +12985,11 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod } pm_arguments_node_arguments_append(call->arguments, value); - target->location.end = value->location.end; + PM_NODE_LENGTH_SET_NODE(target, value); // Replace the name with "[]=". call->name = pm_parser_constant_id_constant(parser, "[]=", 3); - call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator); + call->equal_loc = TOK2LOC(parser, operator); // Ensure that the arguments for []= don't contain keywords pm_index_arguments_check(parser, call->arguments, call->block); @@ -13080,7 +13040,7 @@ parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t default: break; } - pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1); + pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1); pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals); pm_node_destroy(parser, target); @@ -13242,9 +13202,9 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) { // This is an inlined version of accept1 because the error that we // want to add has varargs. If this happens again, we should // probably extract a helper function. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } } @@ -13269,20 +13229,20 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) { */ static void pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) { - const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true); + const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start, parser->start_line, literals, node, true); if (duplicated != NULL) { pm_buffer_t buffer = { 0 }; - pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated); + pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start, parser->start_line, parser->encoding->name, duplicated); pm_diagnostic_list_append_format( &parser->warning_list, duplicated->location.start, - duplicated->location.end, + duplicated->location.length, PM_WARN_DUPLICATED_HASH_KEY, (int) pm_buffer_length(&buffer), pm_buffer_value(&buffer), - pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line + pm_newline_list_line_column(&parser->newline_list, PM_NODE_START(node), parser->start_line).line ); pm_buffer_free(&buffer); @@ -13297,14 +13257,14 @@ static void pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) { pm_node_t *previous; - if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) { + if ((previous = pm_static_literals_add(&parser->newline_list, parser->start, parser->start_line, literals, node, false)) != NULL) { pm_diagnostic_list_append_format( &parser->warning_list, - node->location.start, - node->location.end, + PM_NODE_START(node), + PM_NODE_LENGTH(node), PM_WARN_DUPLICATED_WHEN_CLAUSE, - pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line, - pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line + pm_newline_list_line_column(&parser->newline_list, PM_NODE_START(node), parser->start_line).line, + pm_newline_list_line_column(&parser->newline_list, PM_NODE_START(previous), parser->start_line).line ); } } @@ -13350,7 +13310,6 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label)); pm_hash_key_static_literals_add(parser, literals, key); - pm_token_t operator = not_provided(parser); pm_node_t *value = NULL; if (token_begins_expression_p(parser->current.type)) { @@ -13364,7 +13323,7 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 }; if (identifier.end[-1] == '!' || identifier.end[-1] == '?') { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ); } else { depth = pm_parser_local_depth(parser, &identifier); } @@ -13376,11 +13335,11 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod } } - value->location.end++; + value->location.length++; value = UP(pm_implicit_node_create(parser, value)); } - element = UP(pm_assoc_node_create(parser, key, &operator, value)); + element = UP(pm_assoc_node_create(parser, key, NULL, value)); break; } default: { @@ -13394,16 +13353,14 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod pm_hash_key_static_literals_add(parser, literals, key); - pm_token_t operator; - if (pm_symbol_node_label_p(key)) { - operator = not_provided(parser); - } else { + pm_token_t operator = { 0 }; + if (!pm_symbol_node_label_p(parser, key)) { expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET); operator = parser->previous; } pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); - element = UP(pm_assoc_node_create(parser, key, &operator, value)); + element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value)); break; } } @@ -13434,14 +13391,14 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod static inline bool argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) { - if (pm_symbol_node_label_p(argument)) { + if (pm_symbol_node_label_p(parser, argument)) { return true; } switch (PM_NODE_TYPE(argument)) { case PM_CALL_NODE: { pm_call_node_t *cast = (pm_call_node_t *) argument; - if (cast->opening_loc.start == NULL && cast->arguments != NULL) { + if (cast->opening_loc.length == 0 && cast->arguments != NULL) { if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) { return false; } @@ -13560,7 +13517,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1)); if (parsed_bare_hash) { - pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT); + pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT); } argument = UP(pm_splat_node_create(parser, &operator, expression)); @@ -13585,7 +13542,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for // ... operator. if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) { pm_range_node_t *range = (pm_range_node_t *) right; - pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR); + pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR); } argument = UP(pm_range_node_create(parser, NULL, &operator, right)); @@ -13613,16 +13570,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for bool contains_keywords = false; bool contains_keyword_splat = false; - if (argument_allowed_for_bare_hash(parser, argument)){ + if (argument_allowed_for_bare_hash(parser, argument)) { if (parsed_bare_hash) { pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH); } - pm_token_t operator; + pm_token_t operator = { 0 }; if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) { operator = parser->previous; - } else { - operator = not_provided(parser); } pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser); @@ -13634,7 +13589,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for // Finish parsing the one we are part way through. pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); - argument = UP(pm_assoc_node_create(parser, argument, &operator, value)); + argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value)); pm_keyword_hash_node_elements_append(bare_hash, argument); argument = UP(bare_hash); @@ -13691,7 +13646,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for // `foo(bar 1 do end, 2)` should be rejected. if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) { pm_call_node_t *call = (pm_call_node_t *) argument; - if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) { + if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) { pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA); break; } @@ -13723,7 +13678,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) { expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER); pm_multi_target_node_t *node = pm_multi_target_node_create(parser); - pm_multi_target_node_opening_set(node, &parser->previous); + pm_multi_target_node_opening_set(parser, node, &parser->previous); do { pm_node_t *param; @@ -13771,7 +13726,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) { accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER); - pm_multi_target_node_closing_set(node, &parser->previous); + pm_multi_target_node_closing_set(parser, node, &parser->previous); return node; } @@ -13887,7 +13842,7 @@ parse_parameters( parser_lex(parser); pm_token_t operator = parser->previous; - pm_token_t name; + pm_token_t name = { 0 }; bool repeated = false; if (accept1(parser, PM_TOKEN_IDENTIFIER)) { @@ -13895,11 +13850,10 @@ parse_parameters( repeated = pm_parser_parameter_name_check(parser, &name); pm_parser_local_add_token(parser, &name, 1); } else { - name = not_provided(parser); parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK; } - pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator); + pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator); if (repeated) { pm_node_flag_set_repeated_parameter(UP(param)); } @@ -13994,7 +13948,7 @@ parse_parameters( // reads of that parameter, then we need to warn that we // have a circular definition. if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR); } context_pop(parser); @@ -14034,9 +13988,9 @@ parse_parameters( local.end -= 1; if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) { - pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT); + pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT); } else if (local.end[-1] == '!' || local.end[-1] == '?') { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE); } bool repeated = pm_parser_parameter_name_check(parser, &local); @@ -14085,7 +14039,7 @@ parse_parameters( if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser); if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR); } param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value)); @@ -14120,7 +14074,7 @@ parse_parameters( parser_lex(parser); pm_token_t operator = parser->previous; - pm_token_t name; + pm_token_t name = { 0 }; bool repeated = false; if (accept1(parser, PM_TOKEN_IDENTIFIER)) { @@ -14128,11 +14082,10 @@ parse_parameters( repeated = pm_parser_parameter_name_check(parser, &name); pm_parser_local_add_token(parser, &name, 1); } else { - name = not_provided(parser); parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS; } - pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, &name)); + pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name))); if (repeated) { pm_node_flag_set_repeated_parameter(param); } @@ -14162,7 +14115,7 @@ parse_parameters( param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous)); } else { - pm_token_t name; + pm_token_t name = { 0 }; bool repeated = false; if (accept1(parser, PM_TOKEN_IDENTIFIER)) { @@ -14170,11 +14123,10 @@ parse_parameters( repeated = pm_parser_parameter_name_check(parser, &name); pm_parser_local_add_token(parser, &name, 1); } else { - name = not_provided(parser); parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS; } - param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, &name)); + param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name))); if (repeated) { pm_node_flag_set_repeated_parameter(param); } @@ -14236,7 +14188,7 @@ parse_parameters( pm_do_loop_stack_pop(parser); // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`. - if (params->base.location.start == params->base.location.end) { + if (PM_NODE_START(params) == PM_NODE_END(params)) { pm_node_destroy(parser, UP(params)); return NULL; } @@ -14260,7 +14212,7 @@ token_newline_index(const pm_parser_t *parser) { // start of a heredoc, so we cannot rely on looking at the previous // offset of the newline list, and instead must go through the whole // process of a binary search for the line number. - return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0); + return (size_t) pm_newline_list_line(&parser->newline_list, PM_TOKEN_START(parser, &parser->current), 0); } } @@ -14334,8 +14286,8 @@ parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_ind // Otherwise, add a warning. PM_PARSER_WARN_FORMAT( parser, - closing_token->start, - closing_token->end, + PM_TOKEN_START(parser, closing_token), + PM_TOKEN_LENGTH(closing_token), PM_WARN_INDENTATION_MISMATCH, (int) (closing_token->end - closing_token->start), (const char *) closing_token->start, @@ -14375,7 +14327,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ // we're going to have an empty list of exceptions to rescue (which // implies StandardError). parser_lex(parser); - pm_rescue_node_operator_set(rescue, &parser->previous); + pm_rescue_node_operator_set(parser, rescue, &parser->previous); pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1)); reference = parse_target(parser, reference, false, false); @@ -14405,7 +14357,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ // If we hit a `=>` then we're going to parse the exception variable. Once // we've done that, we'll break out of the loop and parse the statements. if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) { - pm_rescue_node_operator_set(rescue, &parser->previous); + pm_rescue_node_operator_set(parser, rescue, &parser->previous); pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1)); reference = parse_target(parser, reference, false, false); @@ -14420,11 +14372,11 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { - rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous); + rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous); } } else { expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM); - rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous); + rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous); } if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) { @@ -14462,11 +14414,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ // since we won't know the end until we've found all subsequent // clauses. This sets the end location on all rescues once we know it. if (current != NULL) { - const uint8_t *end_to_set = current->base.location.end; pm_rescue_node_t *clause = parent_node->rescue_clause; while (clause != NULL) { - clause->base.location.end = end_to_set; + PM_NODE_LENGTH_SET_NODE(clause, current); clause = clause->subsequent; } } @@ -14547,10 +14498,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ if (match1(parser, PM_TOKEN_KEYWORD_END)) { if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false); - pm_begin_node_end_keyword_set(parent_node, &parser->current); + pm_begin_node_end_keyword_set(parser, parent_node, &parser->current); } else { - pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; - pm_begin_node_end_keyword_set(parent_node, &end_keyword); + pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end }; + pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword); } } @@ -14560,11 +14511,11 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ */ static pm_begin_node_t * parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) { - pm_token_t begin_keyword = not_provided(parser); - pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements); - + pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements); parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1)); - node->base.location.start = start; + + node->base.location.start = U32(start - parser->start); + PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current); return node; } @@ -14602,7 +14553,7 @@ parse_block_parameters( } pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening); - if ((opening->type != PM_TOKEN_NOT_PROVIDED)) { + if (opening != NULL) { accept1(parser, PM_TOKEN_NEWLINE); if (accept1(parser, PM_TOKEN_SEMICOLON)) { @@ -14715,8 +14666,8 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK); } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) { pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK); - } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0')); + } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0')); } else { assert(false && "unreachable"); } @@ -14735,9 +14686,7 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) { scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER; } - - const pm_location_t location = { .start = opening->start, .end = closing->end }; - return UP(pm_numbered_parameters_node_create(parser, &location, numbered_parameter)); + return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter)); } if (it_parameter) { @@ -14773,7 +14722,7 @@ parse_block(pm_parser_t *parser, uint16_t depth) { expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM); } - pm_block_parameters_node_closing_set(block_parameters, &parser->previous); + pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous); } accept1(parser, PM_TOKEN_NEWLINE); @@ -14823,22 +14772,22 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { found |= true; - arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments->opening_loc = TOK2LOC(parser, &parser->previous); if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments->closing_loc = TOK2LOC(parser, &parser->previous); } else { pm_accepts_block_stack_push(parser, true); parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1)); if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } pm_accepts_block_stack_pop(parser); - arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments->closing_loc = TOK2LOC(parser, &parser->previous); } } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) { found |= true; @@ -14853,7 +14802,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept // then we have a trailing comma where we need to check whether it is // allowed or not. if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type)); } pm_accepts_block_stack_pop(parser); @@ -15157,7 +15106,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); pm_token_t keyword = parser->previous; - pm_token_t then_keyword = not_provided(parser); + pm_token_t then_keyword = { 0 }; pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1)); pm_statements_node_t *statements = NULL; @@ -15169,15 +15118,14 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); } - pm_token_t end_keyword = not_provided(parser); pm_node_t *parent = NULL; switch (context) { case PM_CONTEXT_IF: - parent = UP(pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword)); + parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL)); break; case PM_CONTEXT_UNLESS: - parent = UP(pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements)); + parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements)); break; default: assert(false && "unreachable"); @@ -15191,7 +15139,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl if (context == PM_CONTEXT_IF) { while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) { if (parser_end_of_line_p(parser)) { - PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL); + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL); } parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); @@ -15205,7 +15153,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl pm_accepts_block_stack_pop(parser); accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); - pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword)); + pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL)); ((pm_if_node_t *) current)->subsequent = elsif; current = elsif; } @@ -15253,12 +15201,12 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl while (recursing) { switch (PM_NODE_TYPE(current)) { case PM_IF_NODE: - pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous); + pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous); current = ((pm_if_node_t *) current)->subsequent; recursing = current != NULL; break; case PM_ELSE_NODE: - pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous); + pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous); recursing = false; break; default: { @@ -15270,7 +15218,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl break; } case PM_CONTEXT_UNLESS: - pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous); + pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous); break; default: assert(false && "unreachable"); @@ -15385,10 +15333,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { // "aaa #{bbb} #@ccc ddd" // ^^^^ ^ ^^^^ case PM_TOKEN_STRING_CONTENT: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *node = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); pm_node_flag_set(node, parse_unescaped_encoding(parser)); parser_lex(parser); @@ -15423,9 +15368,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { parser->brace_nesting = brace_nesting; lex_state_set(parser, state); - expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END); - pm_token_t closing = parser->previous; // If this set of embedded statements only contains a single // statement, then Ruby does not consider it as a possible statement @@ -15434,7 +15377,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE); } - return UP(pm_embedded_statements_node_create(parser, &opening, statements, &closing)); + return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous)); } // Here the lexer has returned the beginning of an embedded variable. @@ -15490,7 +15433,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { // missing node. default: expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID); - variable = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); break; } @@ -15522,9 +15465,7 @@ parse_operator_symbol_name(const pm_token_t *name) { static pm_node_t * parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) { - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL); const uint8_t *end = parse_operator_symbol_name(&parser->current); if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state); @@ -15567,9 +15508,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s break; } - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL); pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end); pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); @@ -15581,10 +15520,13 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s if (match1(parser, PM_TOKEN_STRING_END)) { if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state); parser_lex(parser); + pm_token_t content = { + .type = PM_TOKEN_STRING_CONTENT, + .start = parser->previous.start, + .end = parser->previous.start + }; - pm_token_t content = not_provided(parser); - pm_token_t closing = parser->previous; - return UP(pm_symbol_node_create(parser, &opening, &content, &closing)); + return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous)); } // Now we can parse the first part of the symbol. @@ -15615,7 +15557,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED); } - pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous); + pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous); return UP(symbol); } @@ -15638,12 +15580,10 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s // interpolated string node, so that's what we'll do here. if (match1(parser, PM_TOKEN_STRING_CONTENT)) { pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening); - pm_token_t bounds = not_provided(parser); - - pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped)); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped)); pm_interpolated_symbol_node_append(symbol, part); - part = UP(pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string)); + part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string)); pm_interpolated_symbol_node_append(symbol, part); if (next_state != PM_LEX_STATE_NONE) { @@ -15653,7 +15593,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s parser_lex(parser); expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC); - pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous); + pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous); return UP(symbol); } } else { @@ -15681,20 +15621,15 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s static inline pm_node_t * parse_undef_argument(pm_parser_t *parser, uint16_t depth) { switch (parser->current.type) { - case PM_CASE_OPERATOR: { - const pm_token_t opening = not_provided(parser); - return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE); - } + case PM_CASE_OPERATOR: + return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE); case PM_CASE_KEYWORD: case PM_TOKEN_CONSTANT: case PM_TOKEN_IDENTIFIER: case PM_TOKEN_METHOD_NAME: { parser_lex(parser); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL); pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end); pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); @@ -15708,7 +15643,7 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) { } default: pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT); - return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } } @@ -15721,10 +15656,8 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) { static inline pm_node_t * parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) { switch (parser->current.type) { - case PM_CASE_OPERATOR: { - const pm_token_t opening = not_provided(parser); - return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE); - } + case PM_CASE_OPERATOR: + return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE); case PM_CASE_KEYWORD: case PM_TOKEN_CONSTANT: case PM_TOKEN_IDENTIFIER: @@ -15732,10 +15665,7 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) { if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM); parser_lex(parser); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL); pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end); pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); @@ -15758,7 +15688,7 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) { return UP(pm_global_variable_read_node_create(parser, &parser->previous)); default: pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT); - return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } } @@ -15770,7 +15700,7 @@ static pm_node_t * parse_variable(pm_parser_t *parser) { pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous); int depth; - bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end); + bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)); if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) { return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false)); @@ -15840,7 +15770,7 @@ parse_method_definition_name(pm_parser_t *parser) { parser_lex(parser); return parser->previous; case PM_TOKEN_IDENTIFIER: - pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end); + pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)); parser_lex(parser); return parser->previous; case PM_CASE_OPERATOR: @@ -15848,8 +15778,8 @@ parse_method_definition_name(pm_parser_t *parser) { parser_lex(parser); return parser->previous; default: - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type)); - return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end }; + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type)); + return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end }; } } @@ -15977,10 +15907,8 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 // If we get here, then we have an end of a label immediately // after a start. In that case we'll create an empty symbol // node. - pm_token_t content = parse_strings_empty_content(parser->previous.start); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous); - - pm_string_shared_init(&symbol->unescaped, content.start, content.end); + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous); + pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start); node = UP(symbol); if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL); @@ -15992,7 +15920,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 if (match1(parser, PM_TOKEN_EOF)) { unescaped = PM_STRING_EMPTY; - content = not_provided(parser); + content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start }; } else { unescaped = parser->current_string; expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT); @@ -16012,13 +15940,11 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 // be able to contain all of the parts. if (match1(parser, PM_TOKEN_STRING_CONTENT)) { pm_node_list_t parts = { 0 }; - - pm_token_t delimiters = not_provided(parser); - pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped)); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped)); pm_node_list_append(&parts, part); do { - part = UP(pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters)); + part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); pm_node_list_append(&parts, part); parser_lex(parser); } while (match1(parser, PM_TOKEN_STRING_CONTENT)); @@ -16036,9 +15962,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 } else if (accept1(parser, PM_TOKEN_STRING_END)) { node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped)); } else { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped)); } } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) { @@ -16061,10 +15987,10 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 if (!accept1(parser, PM_TOKEN_STRING_END)) { const uint8_t *location = parser->previous.end; if (location > parser->start && location[-1] == '\n') location--; - pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF); + pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } } else if (accept1(parser, PM_TOKEN_LABEL_END)) { node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true))); @@ -16073,10 +15999,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 // If we get here, then we have interpolation so we'll need // to create a string or symbol node with interpolation. pm_node_list_t parts = { 0 }; - pm_token_t string_opening = not_provided(parser); - pm_token_t string_closing = not_provided(parser); - - pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped)); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped)); pm_node_flag_set(part, parse_unescaped_encoding(parser)); pm_node_list_append(&parts, part); @@ -16153,9 +16076,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 } concating = true; - pm_token_t bounds = not_provided(parser); - - pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds); + pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); pm_interpolated_string_node_append(container, current); current = UP(container); } @@ -16182,10 +16103,10 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag static void parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) { // Skip this capture if it starts with an underscore. - if (peek_at(parser, location->start) == '_') return; + if (peek_at(parser, parser->start + location->start) == '_') return; if (pm_constant_id_list_includes(captures, capture)) { - pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE); + pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE); } else { pm_constant_id_list_append(captures, capture); } @@ -16254,13 +16175,13 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures case PM_ARRAY_PATTERN_NODE: { pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner; - if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = node->location.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(pattern_node, node); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); pattern_node->constant = node; - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); return UP(pattern_node); } @@ -16270,13 +16191,13 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures case PM_FIND_PATTERN_NODE: { pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner; - if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = node->location.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(pattern_node, node); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); pattern_node->constant = node; - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); return UP(pattern_node); } @@ -16286,13 +16207,13 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures case PM_HASH_PATTERN_NODE: { pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner; - if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = node->location.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(pattern_node, node); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); pattern_node->constant = node; - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); return UP(pattern_node); } @@ -16324,18 +16245,17 @@ parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) { // will check for that here. If they do, then we'll add it to the local // table since this pattern will cause it to become a local variable. if (accept1(parser, PM_TOKEN_IDENTIFIER)) { - pm_token_t identifier = parser->previous; - pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier); + pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous); int depth; if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) { - pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0); + pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier)); + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); name = UP(pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&identifier), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) )); @@ -16368,10 +16288,10 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); value = UP(pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&parser->previous), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) )); @@ -16414,22 +16334,24 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u static pm_node_t * parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) { const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc; + const uint8_t *start = parser->start + PM_LOCATION_START(value_loc); + const uint8_t *end = parser->start + PM_LOCATION_END(value_loc); - pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end); + pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end); int depth = -1; - if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) { + if (pm_slice_is_valid_local(parser, start, end)) { depth = pm_parser_local_depth_constant_id(parser, constant_id); } else { - pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS); + pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS); - if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) { - PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start); + if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) { + PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start); } } if (depth == -1) { - pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0); + pm_parser_local_add(parser, constant_id, start, end, 0); } parse_pattern_capture(parser, captures, constant_id, value_loc); @@ -16449,7 +16371,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca */ static void parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) { - if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) { + if (pm_static_literals_add(&parser->newline_list, parser->start, parser->start_line, keys, node, true) != NULL) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE); } } @@ -16469,7 +16391,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node rest = first_node; break; case PM_SYMBOL_NODE: { - if (pm_symbol_node_label_p(first_node)) { + if (pm_symbol_node_label_p(parser, first_node)) { parse_pattern_hash_key(parser, &keys, first_node); pm_node_t *value; @@ -16483,9 +16405,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1)); } - pm_token_t operator = not_provided(parser); - pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value)); - + pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value)); pm_node_list_append(&assocs, assoc); break; } @@ -16498,9 +16418,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL; pm_parser_err_node(parser, first_node, diag_id); - pm_token_t operator = not_provided(parser); - pm_node_t *value = UP(pm_missing_node_create(parser, first_node->location.start, first_node->location.end)); - pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value)); + pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node))); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value)); pm_node_list_append(&assocs, assoc); break; @@ -16536,12 +16455,16 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) { pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED); - } else if (!pm_symbol_node_label_p(key)) { + } else if (!pm_symbol_node_label_p(parser, key)) { pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA); } + } else if (accept1(parser, PM_TOKEN_LABEL)) { + key = UP(pm_symbol_node_label_create(parser, &parser->previous)); } else { expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA); - key = UP(pm_symbol_node_label_create(parser, &parser->previous)); + + pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end }; + key = UP(pm_symbol_node_create(parser, NULL, &label, NULL)); } parse_pattern_hash_key(parser, &keys, key); @@ -16551,14 +16474,13 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) { value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key); } else { - value = UP(pm_missing_node_create(parser, key->location.end, key->location.end)); + value = UP(pm_missing_node_create(parser, PM_NODE_END(key), 0)); } } else { value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1)); } - pm_token_t operator = not_provided(parser); - pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, &operator, value)); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value)); if (rest != NULL) { pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST); @@ -16591,10 +16513,10 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); return UP(pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&parser->previous), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) )); @@ -16620,12 +16542,12 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm switch (PM_NODE_TYPE(inner)) { case PM_ARRAY_PATTERN_NODE: { pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner; - if (pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = opening.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); return UP(pattern_node); } @@ -16634,12 +16556,12 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm } case PM_FIND_PATTERN_NODE: { pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner; - if (pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = opening.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); return UP(pattern_node); } @@ -16681,10 +16603,10 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1)); break; default: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type)); parser_lex(parser); - first_node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end)); + first_node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); break; } } @@ -16695,11 +16617,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening); pm_token_t closing = parser->previous; - node->base.location.start = opening.start; - node->base.location.end = closing.end; + PM_NODE_START_SET_TOKEN(parser, node, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing); - node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + node->opening_loc = TOK2LOC(parser, &opening); + node->closing_loc = TOK2LOC(parser, &closing); } parser->pattern_matching_newlines = previous_pattern_matching_newlines; @@ -16719,7 +16641,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm } default: { pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE); - pm_node_t *right = UP(pm_missing_node_create(parser, operator.start, operator.end)); + pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator))); return UP(pm_range_node_create(parser, NULL, &operator, right)); } } @@ -16728,12 +16650,12 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1)); // If we found a label, we need to immediately return to the caller. - if (pm_symbol_node_label_p(node)) return node; + if (pm_symbol_node_label_p(parser, node)) return node; // Call nodes (arithmetic operations) are not allowed in patterns if (PM_NODE_TYPE(node) == PM_CALL_NODE) { pm_parser_err_node(parser, node, diag_id); - pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end); + pm_missing_node_t *missing_node = pm_missing_node_create(parser, PM_NODE_START(node), PM_NODE_LENGTH(node)); pm_node_unreference(parser, node); pm_node_destroy(parser, node); @@ -16771,7 +16693,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_node_t *variable = UP(parse_variable(parser)); if (variable == NULL) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE); variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0)); } @@ -16825,7 +16747,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm // If we get here, then we have a pin operator followed by something // not understood. We'll create a missing node and return that. pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN); - pm_node_t *variable = UP(pm_missing_node_create(parser, operator.start, operator.end)); + pm_node_t *variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator))); return UP(pm_pinned_variable_node_create(parser, &operator, variable)); } } @@ -16848,16 +16770,18 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm } default: pm_parser_err_current(parser, diag_id); - return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } } static bool parse_pattern_alternation_error_each(const pm_node_t *node, void *data) { switch (PM_NODE_TYPE(node)) { - case PM_LOCAL_VARIABLE_TARGET_NODE: - pm_parser_err((pm_parser_t *) data, node->location.start, node->location.end, PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE); + case PM_LOCAL_VARIABLE_TARGET_NODE: { + pm_parser_t *parser = (pm_parser_t *) data; + pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE); return false; + } default: return true; } @@ -16930,7 +16854,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p } default: { pm_parser_err_current(parser, diag_id); - pm_node_t *right = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); if (!alternation) { node = right; @@ -16957,10 +16881,10 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); pm_local_variable_target_node_t *target = pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&parser->previous), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) ); @@ -17008,7 +16932,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag // be dynamic symbols leading to hash patterns. node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1)); - if (pm_symbol_node_label_p(node)) { + if (pm_symbol_node_label_p(parser, node)) { node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1))); if (!(flags & PM_PARSE_PATTERN_TOP)) { @@ -17037,7 +16961,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag // If we got a dynamic label symbol, then we need to treat it like the // beginning of a hash pattern. - if (pm_symbol_node_label_p(node)) { + if (pm_symbol_node_label_p(parser, node)) { return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1))); } @@ -17115,23 +17039,27 @@ parse_negative_numeric(pm_node_t *node) { case PM_INTEGER_NODE: { pm_integer_node_t *cast = (pm_integer_node_t *) node; cast->base.location.start--; + cast->base.location.length++; cast->value.negative = true; break; } case PM_FLOAT_NODE: { pm_float_node_t *cast = (pm_float_node_t *) node; cast->base.location.start--; + cast->base.location.length++; cast->value = -cast->value; break; } case PM_RATIONAL_NODE: { pm_rational_node_t *cast = (pm_rational_node_t *) node; cast->base.location.start--; + cast->base.location.length++; cast->numerator.negative = true; break; } case PM_IMAGINARY_NODE: node->location.start--; + node->location.length++; parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric); break; default: @@ -17149,22 +17077,22 @@ static void pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { switch (diag_id) { case PM_ERR_HASH_KEY: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_type_human(parser->previous.type)); break; } case PM_ERR_HASH_VALUE: case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type)); break; } case PM_ERR_UNARY_RECEIVER: { const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type)); - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]); break; } case PM_ERR_UNARY_DISALLOWED: case PM_ERR_EXPECT_ARGUMENT: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_type_human(parser->current.type)); break; } default: @@ -17391,15 +17319,15 @@ typedef struct { static void parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) { parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data; - pm_location_t location; + pm_token_t location; if (callback_data->shared) { - location = (pm_location_t) { .start = start, .end = end }; + location = (pm_token_t) { .type = 0, .start = start, .end = end }; } else { - location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end }; + location = (pm_token_t) { .type = 0, .start = callback_data->start, .end = callback_data->end }; } - PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message); + PM_PARSER_ERR_FORMAT(callback_data->parser, PM_TOKEN_START(callback_data->parser, &location), PM_TOKEN_LENGTH(&location), PM_ERR_REGEXP_PARSE_ERROR, message); } /** @@ -17410,8 +17338,8 @@ parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_ const pm_string_t *unescaped = &node->unescaped; parse_regular_expression_error_data_t error_data = { .parser = parser, - .start = node->base.location.start, - .end = node->base.location.end, + .start = parser->start + PM_NODE_START(node), + .end = parser->start + PM_NODE_END(node), .shared = unescaped->type == PM_STRING_SHARED }; @@ -17451,11 +17379,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } else { // If there was no comma, then we need to add a syntax // error. - const uint8_t *location = parser->previous.end; - PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type)); - - parser->previous.start = location; - parser->previous.type = PM_TOKEN_MISSING; + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type)); + parser->previous.start = parser->previous.end; + parser->previous.type = 0; } } @@ -17494,7 +17420,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } else { element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1)); - if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) { + if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) { if (parsed_bare_hash) { pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH); } @@ -17503,15 +17429,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_static_literals_t hash_keys = { 0 }; pm_hash_key_static_literals_add(parser, &hash_keys, element); - pm_token_t operator; + pm_token_t operator = { 0 }; if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) { operator = parser->previous; - } else { - operator = not_provided(parser); } pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); - pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, &operator, value)); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value)); pm_keyword_hash_node_elements_append(hash, assoc); element = UP(hash); @@ -17531,12 +17455,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b accept1(parser, PM_TOKEN_NEWLINE); if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } - pm_array_node_close_set(array, &parser->previous); + pm_array_node_close_set(parser, array, &parser->previous); pm_accepts_block_stack_pop(parser); return UP(array); @@ -17618,20 +17542,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // multiple target node. pm_multi_target_node_t *multi_target; - if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) { + if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) { multi_target = (pm_multi_target_node_t *) statement; } else { multi_target = pm_multi_target_node_create(parser); pm_multi_target_node_targets_append(parser, multi_target, statement); } - pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); - - multi_target->lparen_loc = lparen_loc; - multi_target->rparen_loc = rparen_loc; - multi_target->base.location.start = lparen_loc.start; - multi_target->base.location.end = rparen_loc.end; + multi_target->lparen_loc = TOK2LOC(parser, &opening); + multi_target->rparen_loc = TOK2LOC(parser, &parser->previous); + PM_NODE_START_SET_TOKEN(parser, multi_target, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous); pm_node_t *result; if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) { @@ -17682,7 +17603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we didn't find a terminator and we didn't find a right // parenthesis, then this is a syntax error. if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); } // Parse each statement within the parentheses. @@ -17713,7 +17634,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } else if (!match1(parser, PM_TOKEN_EOF)) { // If we're at the end of the file, then we're going to add // an error after this for the ) anyway. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); } } @@ -17737,9 +17658,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) { - const uint8_t *offset = statement->location.end; + const uint8_t *offset = parser->start + PM_NODE_END(statement); pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset }; - pm_node_t *value = UP(pm_missing_node_create(parser, offset, offset)); + pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_END(statement), 0)); statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value)); statements->body.nodes[statements->body.size - 1] = statement; @@ -17785,12 +17706,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_accepts_block_stack_pop(parser); expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening); - pm_hash_node_closing_loc_set(node, &parser->previous); + pm_hash_node_closing_loc_set(parser, node, &parser->previous); return UP(node); } case PM_TOKEN_CHARACTER_LITERAL: { - pm_token_t closing = not_provided(parser); pm_node_t *node = UP(pm_string_node_create_current_string( parser, &(pm_token_t) { @@ -17803,7 +17723,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b .start = parser->current.start + 1, .end = parser->current.end }, - &closing + NULL )); pm_node_flag_set(node, parse_unescaped_encoding(parser)); @@ -17953,11 +17873,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b call->closing_loc = arguments.closing_loc; call->block = arguments.block; - const uint8_t *end = pm_arguments_end(&arguments); - if (!end) { - end = call->message_loc.end; + const pm_location_t *end = pm_arguments_end(&arguments); + if (end == NULL) { + PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc); + } else { + PM_NODE_LENGTH_SET_LOCATION(call, end); } - call->base.location.end = end; } } else { // Otherwise, we know the identifier is in the local table. This @@ -17984,7 +17905,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // purposes of warnings. assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)); - if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) { + if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) { pm_node_unreference(parser, node); } else { pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node; @@ -18030,7 +17951,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY)); } - node->location.end = opening.end; + PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening); } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) { // If we get here, then we tried to find something in the // heredoc but couldn't actually parse anything, so we'll just @@ -18038,7 +17959,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // // parse_string_part handles its own errors, so there is no need // for us to add one here. - node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end)); + node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { // If we get here, then the part that we parsed was plain string // content and we're at the end of the heredoc, so we can return @@ -18047,8 +17968,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_flag_set(part, parse_unescaped_encoding(parser)); pm_string_node_t *cast = (pm_string_node_t *) part; - cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current); + cast->opening_loc = TOK2LOC(parser, &opening); + cast->closing_loc = TOK2LOC(parser, &parser->current); cast->base.location = cast->opening_loc; if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { @@ -18082,7 +18003,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b cast->parts = parts; expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); - pm_interpolated_xstring_node_closing_set(cast, &parser->previous); + pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous); cast->base.location = cast->opening_loc; node = UP(cast); @@ -18091,7 +18012,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_list_free(&parts); expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); - pm_interpolated_string_node_closing_set(cast, &parser->previous); + pm_interpolated_string_node_closing_set(parser, cast, &parser->previous); cast->base.location = cast->opening_loc; node = UP(cast); @@ -18227,11 +18148,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // At this point we can create a case node, though we don't yet know // if it is a case-in or case-when node. - pm_token_t end_keyword = not_provided(parser); pm_node_t *node; if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) { - pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword); + pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL); pm_static_literals_t literals = { 0 }; // At this point we've seen a when keyword, so we know this is a @@ -18275,11 +18195,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { - pm_when_node_then_keyword_loc_set(when_node, &parser->previous); + pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous); } } else { expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER); - pm_when_node_then_keyword_loc_set(when_node, &parser->previous); + pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous); } if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { @@ -18301,7 +18221,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_static_literals_free(&literals); node = UP(case_node); } else { - pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword); + pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate); // If this is a case-match node (i.e., it is a pattern matching // case statement) then we must have a predicate. @@ -18346,12 +18266,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // Now we need to check for the terminator of the in node's // pattern. It can be a newline or semicolon optionally // followed by a `then` keyword. - pm_token_t then_keyword; + pm_token_t then_keyword = { 0 }; if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { then_keyword = parser->previous; - } else { - then_keyword = not_provided(parser); } } else { expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER); @@ -18369,7 +18287,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // Now that we have the full pattern and statements, we can // create the node and attach it to the case node. - pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword)); + pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword))); pm_case_match_node_condition_append(case_node, condition); } @@ -18404,9 +18322,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword); if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { - pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous); + pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous); } else { - pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous); + pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous); } pop_block_exits(parser, previous_block_exits); @@ -18436,8 +18354,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1)); expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword); - begin_node->base.location.end = parser->previous.end; - pm_begin_node_end_keyword_set(begin_node, &parser->previous); + PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous); + pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous); pop_block_exits(parser, previous_block_exits); pm_node_list_free(¤t_block_exits); @@ -18490,7 +18408,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // Reject `foo && return bar`. if (!accepts_command_call && arguments.arguments != NULL) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type)); } } } @@ -18513,7 +18431,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } default: assert(false && "unreachable"); - return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } } case PM_TOKEN_KEYWORD_SUPER: { @@ -18524,7 +18442,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1)); if ( - arguments.opening_loc.start == NULL && + arguments.opening_loc.length == 0 && arguments.arguments == NULL && ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE)) ) { @@ -18572,7 +18490,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_parser_scope_push(parser, true); if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type)); } pm_node_t *statements = NULL; @@ -18609,7 +18527,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME); } - pm_token_t inheritance_operator; + pm_token_t inheritance_operator = { 0 }; pm_node_t *superclass; if (match1(parser, PM_TOKEN_LESS)) { @@ -18621,13 +18539,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1)); } else { - inheritance_operator = not_provided(parser); superclass = NULL; } pm_parser_scope_push(parser, true); - if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) { + if (inheritance_operator.start != NULL) { expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END); } else { accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); @@ -18666,7 +18583,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pop_block_exits(parser, previous_block_exits); pm_node_list_free(¤t_block_exits); - return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous)); + return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous)); } case PM_TOKEN_KEYWORD_DEF: { pm_node_list_t current_block_exits = { 0 }; @@ -18676,7 +18593,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b size_t opening_newline_index = token_newline_index(parser); pm_node_t *receiver = NULL; - pm_token_t operator = not_provided(parser); + pm_token_t operator = { 0 }; pm_token_t name; // This context is necessary for lexing `...` in a bare params @@ -18710,7 +18627,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b operator = parser->previous; name = parse_method_definition_name(parser); } else { - pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end); + pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)); pm_parser_scope_push(parser, true); name = parser->previous; @@ -18782,7 +18699,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b name = parse_method_definition_name(parser); } else { if (!valid_name) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type)); } name = identifier; @@ -18823,8 +18740,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b break; } - pm_token_t lparen; - pm_token_t rparen; + pm_token_t lparen = { 0 }; + pm_token_t rparen = { 0 }; pm_parameters_node_t *params; bool accept_endless_def = true; @@ -18844,9 +18761,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b context_pop(parser); if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } rparen = parser->previous; @@ -18859,8 +18776,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL); } - lparen = not_provided(parser); - rparen = not_provided(parser); params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1)); // Reject `def * = 1` and similar. We have to specifically check @@ -18871,18 +18786,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b break; } default: { - lparen = not_provided(parser); - rparen = not_provided(parser); params = NULL; - context_pop(parser); break; } } pm_node_t *statements = NULL; - pm_token_t equal; - pm_token_t end_keyword; + pm_token_t equal = { 0 }; + pm_token_t end_keyword = { 0 }; if (accept1(parser, PM_TOKEN_EQUAL)) { if (token_is_setter_name(&name)) { @@ -18895,7 +18807,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS && parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS ) { - PM_PARSER_ERR_FORMAT(parser, def_keyword.start, parser->previous.end, PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition"); + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition"); } equal = parser->previous; @@ -18926,11 +18838,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false); pm_do_loop_stack_pop(parser); context_pop(parser); - end_keyword = not_provided(parser); } else { - equal = not_provided(parser); - - if (lparen.type == PM_TOKEN_NOT_PROVIDED) { + if (lparen.start == NULL) { lex_state_set(parser, PM_LEX_STATE_BEG); parser->command_start = true; expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM); @@ -18970,7 +18879,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b * methods to override the unary operators, we should ignore * the @ in the same way we do for symbols. */ - pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name)); + pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name)); flush_block_exits(parser, previous_block_exits); pm_node_list_free(¤t_block_exits); @@ -18984,19 +18893,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b statements, &locals, &def_keyword, - &operator, - &lparen, - &rparen, - &equal, - &end_keyword + NTOK2PTR(operator), + NTOK2PTR(lparen), + NTOK2PTR(rparen), + NTOK2PTR(equal), + NTOK2PTR(end_keyword) )); } case PM_TOKEN_KEYWORD_DEFINED: { parser_lex(parser); - pm_token_t keyword = parser->previous; - pm_token_t lparen; - pm_token_t rparen; + pm_token_t keyword = parser->previous; + pm_token_t lparen = { 0 }; + pm_token_t rparen = { 0 }; pm_node_t *expression; context_push(parser, PM_CONTEXT_DEFINED); @@ -19007,31 +18916,26 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0)); - lparen = not_provided(parser); - rparen = not_provided(parser); + lparen = (pm_token_t) { 0 }; } else { expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1)); - if (parser->recovering) { - rparen = not_provided(parser); - } else { + if (!parser->recovering) { accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); rparen = parser->previous; } } } else { - lparen = not_provided(parser); - rparen = not_provided(parser); expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1)); } context_pop(parser); return UP(pm_defined_node_create( parser, - &lparen, + NTOK2PTR(lparen), expression, - &rparen, + NTOK2PTR(rparen), &keyword )); } @@ -19080,7 +18984,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); } else { pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX); - index = UP(pm_missing_node_create(parser, for_keyword.start, for_keyword.end)); + index = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword))); } // Now, if there are multiple index expressions, parse them out. @@ -19099,13 +19003,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1)); pm_do_loop_stack_pop(parser); - pm_token_t do_keyword; + pm_token_t do_keyword = { 0 }; if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) { do_keyword = parser->previous; } else { - do_keyword = not_provided(parser); if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type)); } } @@ -19117,11 +19020,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false); expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword); - return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous)); + return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous)); } case PM_TOKEN_KEYWORD_IF: if (parser_end_of_line_p(parser)) { - PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL); + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL); } size_t opening_newline_index = token_newline_index(parser); @@ -19171,13 +19074,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // syntax. if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) { - pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN); + pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN); } else { accept1(parser, PM_TOKEN_NEWLINE); pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER); } - return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } accept1(parser, PM_TOKEN_NEWLINE); @@ -19188,13 +19091,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0)); } else { - arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen); + arguments.opening_loc = TOK2LOC(parser, &lparen); receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1)); if (!parser->recovering) { accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments.closing_loc = TOK2LOC(parser, &parser->previous); } } } else { @@ -19226,7 +19129,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pop_block_exits(parser, previous_block_exits); pm_node_list_free(¤t_block_exits); - pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing)); } @@ -19315,11 +19218,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_do_loop_stack_pop(parser); context_pop(parser); - pm_token_t do_keyword; + pm_token_t do_keyword = { 0 }; if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) { do_keyword = parser->previous; } else { - do_keyword = not_provided(parser); expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE); } @@ -19334,7 +19236,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword); - return UP(pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0)); + return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0)); } case PM_TOKEN_KEYWORD_WHILE: { size_t opening_newline_index = token_newline_index(parser); @@ -19349,11 +19251,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_do_loop_stack_pop(parser); context_pop(parser); - pm_token_t do_keyword; + pm_token_t do_keyword = { 0 }; if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) { do_keyword = parser->previous; } else { - do_keyword = not_provided(parser); expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE); } @@ -19368,7 +19269,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword); - return UP(pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0)); + return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0)); } case PM_TOKEN_PERCENT_LOWER_I: { parser_lex(parser); @@ -19383,27 +19284,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // Interpolation is not possible but nested heredocs can still lead to // consecutive (disjoint) string tokens when the final newline is escaped. while (match1(parser, PM_TOKEN_STRING_CONTENT)) { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - // Record the string node, moving to interpolation if needed. if (current == NULL) { - current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing)); + current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL)); parser_lex(parser); } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { - pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); parser_lex(parser); pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string); } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { pm_symbol_node_t *cast = (pm_symbol_node_t *) current; - pm_token_t bounds = not_provided(parser); - - pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end }; - pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped)); - pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing)); + pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length }; + pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped)); + pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL)); parser_lex(parser); - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); pm_interpolated_symbol_node_append(interpolated, first_string); pm_interpolated_symbol_node_append(interpolated, second_string); @@ -19425,11 +19321,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM); } - pm_array_node_close_set(array, &closing); + pm_array_node_close_set(parser, array, &closing); return UP(array); } @@ -19459,20 +19355,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b break; } case PM_TOKEN_STRING_CONTENT: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - if (current == NULL) { // If we hit content and the current node is NULL, then this is // the first string content we've seen. In that case we're going // to create a new string node and set that to the current. - current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing)); + current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL)); parser_lex(parser); } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { // If we hit string content and the current node is an // interpolated string, then we need to append the string content // to the list of child nodes. - pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); parser_lex(parser); pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string); @@ -19481,14 +19374,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // then we need to convert the current node into an interpolated // string and add the string content to the list of child nodes. pm_symbol_node_t *cast = (pm_symbol_node_t *) current; - pm_token_t bounds = not_provided(parser); - - pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end }; - pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped)); - pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing)); + pm_token_t content = { + .type = PM_TOKEN_STRING_CONTENT, + .start = parser->start + cast->value_loc.start, + .end = parser->start + cast->value_loc.start + cast->value_loc.length + }; + + pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped)); + pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL)); parser_lex(parser); - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); pm_interpolated_symbol_node_append(interpolated, first_string); pm_interpolated_symbol_node_append(interpolated, second_string); @@ -19506,20 +19402,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we hit an embedded variable and the current node is NULL, // then this is the start of a new string. We'll set the current // node to a new interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing)); + current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL)); } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { // If we hit an embedded variable and the current node is a string // node, then we'll convert the current into an interpolated // string and add the string node to the list of parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current)); pm_interpolated_symbol_node_append(interpolated, current); - interpolated->base.location.start = current->location.start; + PM_NODE_START_SET_NODE(interpolated, current); start_location_set = true; current = UP(interpolated); } else { @@ -19530,7 +19422,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part); if (!start_location_set) { - current->location.start = part->location.start; + PM_NODE_START_SET_NODE(current, part); } break; } @@ -19540,21 +19432,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we hit an embedded expression and the current node is NULL, // then this is the start of a new string. We'll set the current // node to a new interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing)); + current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL)); } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { // If we hit an embedded expression and the current node is a // string node, then we'll convert the current into an // interpolated string and add the string node to the list of // parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current)); pm_interpolated_symbol_node_append(interpolated, current); - interpolated->base.location.start = current->location.start; + PM_NODE_START_SET_NODE(interpolated, current); start_location_set = true; current = UP(interpolated); } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { @@ -19567,7 +19455,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part); if (!start_location_set) { - current->location.start = part->location.start; + PM_NODE_START_SET_NODE(current, part); } break; } @@ -19586,11 +19474,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM); } - pm_array_node_close_set(array, &closing); + pm_array_node_close_set(parser, array, &closing); return UP(array); } @@ -19607,10 +19495,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // Interpolation is not possible but nested heredocs can still lead to // consecutive (disjoint) string tokens when the final newline is escaped. while (match1(parser, PM_TOKEN_STRING_CONTENT)) { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); // Record the string node, moving to interpolation if needed. if (current == NULL) { @@ -19618,7 +19503,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string); } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); pm_interpolated_string_node_append(interpolated, current); pm_interpolated_string_node_append(interpolated, string); current = UP(interpolated); @@ -19639,12 +19524,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM); } - pm_array_node_close_set(array, &closing); + pm_array_node_close_set(parser, array, &closing); return UP(array); } case PM_TOKEN_PERCENT_UPPER_W: { @@ -19678,10 +19563,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b break; } case PM_TOKEN_STRING_CONTENT: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); pm_node_flag_set(string, parse_unescaped_encoding(parser)); parser_lex(parser); @@ -19701,7 +19583,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // a string node, then we need to convert the // current node into an interpolated string and add // the string content to the list of child nodes. - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); pm_interpolated_string_node_append(interpolated, current); pm_interpolated_string_node_append(interpolated, string); current = UP(interpolated); @@ -19717,17 +19599,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // node is NULL, then this is the start of a new // string. We'll set the current node to a new // interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing)); + current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL)); } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { // If we hit an embedded variable and the current // node is a string node, then we'll convert the // current into an interpolated string and add the // string node to the list of parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); pm_interpolated_string_node_append(interpolated, current); current = UP(interpolated); } else { @@ -19746,17 +19624,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // node is NULL, then this is the start of a new // string. We'll set the current node to a new // interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing)); + current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL)); } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { // If we hit an embedded expression and the current // node is a string node, then we'll convert the // current into an interpolated string and add the // string node to the list of parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); pm_interpolated_string_node_append(interpolated, current); current = UP(interpolated); } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { @@ -19786,12 +19660,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM); } - pm_array_node_close_set(array, &closing); + pm_array_node_close_set(parser, array, &closing); return UP(array); } case PM_TOKEN_REGEXP_BEGIN: { @@ -19850,10 +19724,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // a regular expression node with interpolation. interpolated = pm_interpolated_regular_expression_node_create(parser, &opening); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped)); - + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped)); if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) { // This is extremely strange, but the first string part of a // regular expression will always be tagged as binary if we @@ -19881,7 +19752,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM); } @@ -19934,10 +19805,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // create a string node with interpolation. node = pm_interpolated_xstring_node_create(parser, &opening, &opening); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped)); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped)); pm_node_flag_set(part, parse_unescaped_encoding(parser)); pm_interpolated_xstring_node_append(node, part); @@ -19958,11 +19826,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM); } - pm_interpolated_xstring_node_closing_set(node, &closing); + pm_interpolated_xstring_node_closing_set(parser, node, &closing); return UP(node); } @@ -19974,7 +19842,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // still lex past it though and create a missing node place. if (binding_power != PM_BINDING_POWER_STATEMENT) { pm_parser_err_prefix(parser, diag_id); - return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } pm_token_t operator = parser->previous; @@ -20084,13 +19952,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - pm_block_parameters_node_closing_set(block_parameters, &parser->previous); + pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous); break; } case PM_CASE_PARAMETER: { pm_accepts_block_stack_push(parser, false); - pm_token_t opening = not_provided(parser); - block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1)); + block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1)); pm_accepts_block_stack_pop(parser); break; } @@ -20178,17 +20045,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we get here, then we are assuming this token is closing a // parent context, so we'll indicate that to the user so that // they know how we behaved. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable)); } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) { // We're going to make a special case here, because "cannot // parse expression" is pretty generic, and we know here that we // have an unexpected token. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type)); } else { pm_parser_err_prefix(parser, diag_id); } - return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } } } @@ -20285,9 +20152,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) { single_value = false; - pm_token_t opening = not_provided(parser); - pm_array_node_t *array = pm_array_node_create(parser, &opening); - + pm_array_node_t *array = pm_array_node_create(parser, NULL); pm_array_node_elements_append(array, value); value = UP(array); @@ -20315,7 +20180,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding // but without parenthesis. if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) { pm_call_node_t *call_node = (pm_call_node_t *) value; - if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) { + if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) { accepts_command_call_inner = true; } } @@ -20529,7 +20394,8 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { length = pm_buffer_length(&unescaped); } - pm_location_t location; + const uint8_t *start; + const uint8_t *end; pm_constant_id_t name; // If the name of the capture group isn't a valid identifier, we do @@ -20542,12 +20408,14 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { if (callback_data->shared) { // If the unescaped string is a slice of the source, then we can // copy the names directly. The pointers will line up. - location = (pm_location_t) { .start = source, .end = source + length }; - name = pm_parser_constant_id_location(parser, location.start, location.end); + start = source; + end = source + length; + name = pm_parser_constant_id_raw(parser, start, end); } else { // Otherwise, the name is a slice of the malloc-ed owned string, // in which case we need to copy it out into a new string. - location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end }; + start = parser->start + PM_NODE_START(call->receiver); + end = parser->start + PM_NODE_END(call->receiver); void *memory = xmalloc(length); if (memory == NULL) abort(); @@ -20572,7 +20440,7 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { // If the identifier is not already a local, then we will add it to // the local table. - pm_parser_local_add(parser, name, location.start, location.end, 0); + pm_parser_local_add(parser, name, start, end, 0); } // Here we lazily create the MatchWriteNode since we know we're @@ -20583,7 +20451,7 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { // Next, create the local variable target and add it to the list of // targets for the match. - pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth)); + pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth)); pm_node_list_append(&callback_data->match->targets, target); } @@ -20605,8 +20473,8 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t * parse_regular_expression_error_data_t error_data = { .parser = parser, - .start = call->receiver->location.start, - .end = call->receiver->location.end, + .start = parser->start + PM_NODE_START(call->receiver), + .end = parser->start + PM_NODE_END(call->receiver), .shared = content->type == PM_STRING_SHARED }; @@ -20634,7 +20502,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // is parsed because it could be referenced in the value. pm_call_node_t *call_node = (pm_call_node_t *) node; if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0); + pm_parser_local_add_location(parser, &call_node->message_loc, 0); } } PRISM_FALLTHROUGH @@ -20643,7 +20511,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // variable before parsing the value, in case the value // references the variable. if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) { - pm_parser_local_add_location(parser, node->location.start, node->location.end, 0); + pm_parser_local_add_location(parser, &node->location, 0); } parser_lex(parser); @@ -20747,8 +20615,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t return result; } case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start); pm_node_unreference(parser, node); } @@ -20768,10 +20636,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // receiver that could have been a local variable) then we // will transform it into a local variable write. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_location_t *message_loc = &cast->message_loc; - pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end); - - pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1); + pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length); + pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1); parser_lex(parser); pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); @@ -20881,8 +20747,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t return result; } case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node)); pm_node_unreference(parser, node); } @@ -20902,10 +20768,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // receiver that could have been a local variable) then we // will transform it into a local variable write. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_location_t *message_loc = &cast->message_loc; - pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end); - - pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1); + pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length); + pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1); parser_lex(parser); pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); @@ -21025,8 +20889,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t return result; } case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node)); pm_node_unreference(parser, node); } @@ -21047,10 +20911,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // receiver that could have been a local variable) then we // will transform it into a local variable write. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_location_t *message_loc = &cast->message_loc; - pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end); - - pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1); + pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length); + pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1); pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0)); @@ -21088,7 +20950,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // In this case we have an operator but we don't know what it's for. // We need to treat it as an error. For now, we'll mark it as an error // and just skip right past it. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type)); return node; } } @@ -21199,21 +21061,21 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_RESCUE_MODIFIER_NODE: { pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node; if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } case PM_AND_NODE: { pm_and_node_t *cast = (pm_and_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } case PM_OR_NODE: { pm_or_node_t *cast = (pm_or_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } @@ -21229,7 +21091,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_TOKEN_LESS: case PM_TOKEN_LESS_EQUAL: { if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) { - PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON); + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON); } parser_lex(parser); @@ -21252,21 +21114,21 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_RESCUE_MODIFIER_NODE: { pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node; if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } case PM_AND_NODE: { pm_and_node_t *cast = (pm_and_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } case PM_OR_NODE: { pm_or_node_t *cast = (pm_or_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); } break; } @@ -21287,8 +21149,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t break; } default: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type)); - message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type)); + message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } } @@ -21298,7 +21160,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t if ( (previous_binding_power == PM_BINDING_POWER_STATEMENT) && arguments.arguments == NULL && - arguments.opening_loc.start == NULL && + arguments.opening_loc.length == 0 && match1(parser, PM_TOKEN_COMMA) ) { return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); @@ -21364,8 +21226,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // before the `expect` function call to make sure it doesn't // accidentally move past a ':' token that occurs after the syntax // error. - pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; - pm_node_t *false_expression = UP(pm_missing_node_create(parser, colon.start, colon.end)); + pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; + pm_node_t *false_expression = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon))); context_pop(parser); pop_block_exits(parser, previous_block_exits); @@ -21470,7 +21332,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t parser_lex(parser); pm_arguments_t arguments = { 0 }; - arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments.opening_loc = TOK2LOC(parser, &parser->previous); if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { pm_accepts_block_stack_push(parser, true); @@ -21479,7 +21341,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET); } - arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments.closing_loc = TOK2LOC(parser, &parser->previous); // If we have a comma after the closing bracket then this is a multiple // assignment and we should parse the targets. @@ -21564,7 +21426,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t static inline bool pm_call_node_command_p(const pm_call_node_t *node) { return ( - (node->opening_loc.start == NULL) && + (node->opening_loc.length == 0) && (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) && (node->arguments != NULL || node->block != NULL) ); @@ -21582,7 +21444,7 @@ static pm_node_t * parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) { if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) { pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP); - return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end)); + return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth); @@ -21618,7 +21480,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc // If we have a symbol node that is being parsed as a label, then we // need to immediately return, because there should never be an // infix operator following this node. - if (pm_symbol_node_label_p(node)) { + if (pm_symbol_node_label_p(parser, node)) { return node; } break; @@ -21683,7 +21545,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc // If this is a non-assoc operator and we are about to parse the // exact same operator, then we need to add an error. if (match1(parser, current_token_type)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); break; } @@ -21696,7 +21558,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc // if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) { if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); break; } @@ -21723,22 +21585,22 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc if ( // (1) foo[1] !( - cast->call_operator_loc.start == NULL && - cast->message_loc.start != NULL && - cast->message_loc.start[0] == '[' && - cast->message_loc.end[-1] == ']' + cast->call_operator_loc.length == 0 && + cast->message_loc.length > 0 && + parser->start[cast->message_loc.start] == '[' && + parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']' ) && // (2) foo.bar !( - cast->call_operator_loc.start != NULL && + cast->call_operator_loc.length > 0 && cast->arguments == NULL && cast->block == NULL && - cast->opening_loc.start == NULL + cast->opening_loc.length == 0 ) && // (3) foo.bar(1) !( - cast->call_operator_loc.start != NULL && - cast->opening_loc.start != NULL + cast->call_operator_loc.length > 0 && + cast->opening_loc.length > 0 ) && // (4) foo.bar do end !( @@ -21821,7 +21683,7 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) { pm_keyword_hash_node_elements_append(keywords, UP(pm_assoc_node_create( parser, UP(pm_symbol_node_synthesized_create(parser, "chomp")), - &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start }, + NULL, UP(pm_true_node_synthesized_create(parser)) ))); @@ -21887,7 +21749,7 @@ parse_program(pm_parser_t *parser) { // correct the location information. if (statements == NULL) { statements = pm_statements_node_create(parser); - pm_statements_node_location_set(statements, parser->start, parser->start); + statements->base.location = (pm_location_t) { 0 }; } return UP(pm_program_node_create(parser, &locals, statements)); @@ -21928,7 +21790,7 @@ pm_strnstr(const char *big, const char *little, size_t big_length) { static void pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) { if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') { - pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN); + pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN); } } #endif @@ -21986,7 +21848,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm .current = { .type = PM_TOKEN_EOF, .start = source, .end = source }, .next_start = NULL, .heredoc_end = NULL, - .data_loc = { .start = NULL, .end = NULL }, + .data_loc = { 0 }, .comment_list = { 0 }, .magic_comment_list = { 0 }, .warning_list = { 0 }, @@ -22041,7 +21903,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm // guess at the number of newlines that we'll need based on the size of the // input. size_t newline_size = size / 22; - pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size); + pm_newline_list_init(&parser->newline_list, newline_size < 4 ? 4 : newline_size); // If options were provided to this parse, establish them here. if (options != NULL) { @@ -22180,7 +22042,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm const uint8_t *newline = next_newline(cursor, parser->end - cursor); while (newline != NULL) { - pm_newline_list_append(&parser->newline_list, newline); + pm_newline_list_append(&parser->newline_list, U32(newline - parser->start + 1)); cursor = newline + 1; newline = next_newline(cursor, parser->end - cursor); @@ -22209,7 +22071,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor }; parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor }; } else { - pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND); + pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND); pm_newline_list_clear(&parser->newline_list); } } @@ -22506,7 +22368,7 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s pm_serialize_header(buffer); pm_serialize_encoding(parser.encoding, buffer); pm_buffer_append_varsint(buffer, parser.start_line); - pm_serialize_comment_list(&parser, &parser.comment_list, buffer); + pm_serialize_comment_list(&parser.comment_list, buffer); pm_node_destroy(&parser, node); pm_parser_free(&parser); diff --git a/prism/prism.h b/prism/prism.h index c468db18bef3c2..c1ce5829976074 100644 --- a/prism/prism.h +++ b/prism/prism.h @@ -143,11 +143,10 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void /** * Serialize the given list of comments to the given buffer. * - * @param parser The parser to serialize. * @param list The list of comments to serialize. * @param buffer The buffer to serialize to. */ -void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer); +void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer); /** * Serialize the name of the encoding to the buffer. diff --git a/prism/static_literals.c b/prism/static_literals.c index 9fa37b999a9e46..13a52378dda802 100644 --- a/prism/static_literals.c +++ b/prism/static_literals.c @@ -9,6 +9,9 @@ typedef struct { /** The list of newline offsets to use to calculate line numbers. */ const pm_newline_list_t *newline_list; + /** The start of the source being parsed. */ + const uint8_t *start; + /** The line number that the parser starts on. */ int32_t start_line; @@ -353,7 +356,7 @@ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_liter * Add a node to the set of static literals. */ pm_node_t * -pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) { +pm_static_literals_add(const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) { switch (PM_NODE_TYPE(node)) { case PM_INTEGER_NODE: case PM_SOURCE_LINE_NODE: @@ -361,6 +364,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->integer_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -373,6 +377,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->float_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -386,6 +391,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->number_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -399,6 +405,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->string_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -411,6 +418,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->regexp_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -423,6 +431,7 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line &literals->symbol_nodes, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = NULL }, @@ -502,12 +511,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met const double value = ((const pm_float_node_t *) node)->value; if (PRISM_ISINF(value)) { - if (*node->location.start == '-') { + if (metadata->start[node->location.start] == '-') { pm_buffer_append_byte(buffer, '-'); } pm_buffer_append_string(buffer, "Infinity", 8); } else if (value == 0.0) { - if (*node->location.start == '-') { + if (metadata->start[node->location.start] == '-') { pm_buffer_append_byte(buffer, '-'); } pm_buffer_append_string(buffer, "0.0", 3); @@ -604,11 +613,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met * Create a string-based representation of the given static literal. */ void -pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) { +pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node) { pm_static_literal_inspect_node( buffer, &(pm_static_literals_metadata_t) { .newline_list = newline_list, + .start = start, .start_line = start_line, .encoding_name = encoding_name }, diff --git a/prism/static_literals.h b/prism/static_literals.h index bd29761899c29c..0f8eb43bfa5623 100644 --- a/prism/static_literals.h +++ b/prism/static_literals.h @@ -92,13 +92,14 @@ typedef struct { * Add a node to the set of static literals. * * @param newline_list The list of newline offsets to use to calculate lines. + * @param start The start of the source being parsed. * @param start_line The line number that the parser starts on. * @param literals The set of static literals to add the node to. * @param node The node to add to the set. * @param replace Whether to replace the previous node if one already exists. * @return A pointer to the node that is being overwritten, if there is one. */ -pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace); +pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace); /** * Free the internal memory associated with the given static literals set. @@ -112,10 +113,11 @@ void pm_static_literals_free(pm_static_literals_t *literals); * * @param buffer The buffer to write the string to. * @param newline_list The list of newline offsets to use to calculate lines. + * @param start The start of the source being parsed. * @param start_line The line number that the parser starts on. * @param encoding_name The name of the encoding of the source being parsed. * @param node The node to create a string representation of. */ -void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node); +void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node); #endif diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb index 23af8886a7364f..e9c3742085369f 100644 --- a/prism/templates/ext/prism/api_node.c.erb +++ b/prism/templates/ext/prism/api_node.c.erb @@ -12,17 +12,12 @@ static VALUE rb_cPrism<%= node.name %>; <%- end -%> static VALUE -pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source, bool freeze) { +pm_location_new(const uint32_t start, const uint32_t length, VALUE source, bool freeze) { if (freeze) { - VALUE location_argv[] = { - source, - LONG2FIX(start - parser->start), - LONG2FIX(end - start) - }; - + VALUE location_argv[] = { source, LONG2FIX(start), LONG2FIX(length) }; return rb_obj_freeze(rb_class_new_instance(3, location_argv, rb_cPrismLocation)); } else { - uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start))); + uint64_t value = ((((uint64_t) start) << 32) | ((uint64_t) length)); return ULL2NUM(value); } } @@ -30,7 +25,7 @@ pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t * VALUE pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) { ID type = rb_intern(pm_token_type_name(token->type)); - VALUE location = pm_location_new(parser, token->start, token->end, source, freeze); + VALUE location = pm_location_new((uint32_t) (token->start - parser->start), (uint32_t) (token->end - token->start), source, freeze); VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding); if (freeze) rb_obj_freeze(slice); @@ -200,7 +195,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi argv[1] = ULONG2NUM(node->node_id); // location - argv[2] = pm_location_new(parser, node->location.start, node->location.end, source, freeze); + argv[2] = pm_location_new(node->location.start, node->location.length, source, freeze); // flags argv[3] = ULONG2NUM(node->flags); @@ -237,10 +232,10 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi if (freeze) rb_obj_freeze(argv[<%= index %>]); <%- when Prism::Template::LocationField -%> #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" - argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze); + argv[<%= index %>] = pm_location_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze); <%- when Prism::Template::OptionalLocationField -%> #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" - argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze); + argv[<%= index %>] = cast-><%= field.name %>.length == 0 ? Qnil : pm_location_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze); <%- when Prism::Template::UInt8Field -%> #line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>" argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>); diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb index 790cf9ebb8ade1..9115f20eaae766 100644 --- a/prism/templates/include/prism/ast.h.erb +++ b/prism/templates/include/prism/ast.h.erb @@ -46,15 +46,19 @@ typedef struct { } pm_token_t; /** - * This represents a range of bytes in the source string to which a node or - * token corresponds. + * This struct represents a slice in the source code, defined by an offset and + * a length. Note that we have confirmation that we can represent all locations + * within Ruby source files using 32-bit integers per: + * + * https://bugs.ruby-lang.org/issues/20488#note-1 + * */ typedef struct { - /** A pointer to the start location of the range in the source. */ - const uint8_t *start; + /** The offset of the location from the start of the source. */ + uint32_t start; - /** A pointer to the end location of the range in the source. */ - const uint8_t *end; + /** The length of the location. */ + uint32_t length; } pm_location_t; struct pm_node; @@ -112,7 +116,7 @@ static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = 0x2; typedef struct pm_node { /** * This represents the type of the node. It somewhat maps to the nodes that - * existed in the original grammar and ripper, but it's not a 1:1 mapping. + * existed in the original grammar and ripper, but it is not a 1:1 mapping. */ pm_node_type_t type; @@ -129,7 +133,7 @@ typedef struct pm_node { uint32_t node_id; /** - * This is the location of the node in the source. It's a range of bytes + * This is the location of the node in the source. It is a range of bytes * containing a start and an end. */ pm_location_t location; @@ -160,6 +164,15 @@ typedef struct pm_node { * Return true if the given flag is set on the given node. */ #define PM_NODE_FLAG_P(node_, flag_) ((PM_NODE_FLAGS(node_) & (flag_)) != 0) + +/** + * The alignment required for a child node within a parent node. + */ +#ifdef _MSC_VER +#define PM_NODE_ALIGNAS __declspec(align(8)) +#else +#define PM_NODE_ALIGNAS PRISM_ALIGNAS(PRISM_ALIGNOF(void *)) +#endif <%- nodes.each do |node| -%> /** @@ -182,7 +195,6 @@ typedef struct pm_node { typedef struct pm_<%= node.human %> { /** The embedded base node. */ pm_node_t base; - <%- node.fields.each do |field| -%> /** @@ -195,7 +207,7 @@ typedef struct pm_<%= node.human %> { <%- end -%> */ <%= case field - when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "struct #{field.c_type} *#{field.name}" + when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "PM_NODE_ALIGNAS struct #{field.c_type} *#{field.name}" when Prism::Template::NodeListField then "struct pm_node_list #{field.name}" when Prism::Template::ConstantField, Prism::Template::OptionalConstantField then "pm_constant_id_t #{field.name}" when Prism::Template::ConstantListField then "pm_constant_id_list_t #{field.name}" diff --git a/prism/templates/include/prism/diagnostic.h.erb b/prism/templates/include/prism/diagnostic.h.erb index 07bbc8fae79264..c1864e602139e3 100644 --- a/prism/templates/include/prism/diagnostic.h.erb +++ b/prism/templates/include/prism/diagnostic.h.erb @@ -100,25 +100,25 @@ const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id); * memory for its message. * * @param list The list to append to. - * @param start The start of the diagnostic. - * @param end The end of the diagnostic. + * @param start The source offset of the start of the diagnostic. + * @param length The length of the diagnostic. * @param diag_id The diagnostic ID. * @return Whether the diagnostic was successfully appended. */ -bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id); +bool pm_diagnostic_list_append(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id); /** * Append a diagnostic to the given list of diagnostics that is using a format * string for its message. * * @param list The list to append to. - * @param start The start of the diagnostic. - * @param end The end of the diagnostic. + * @param start The source offset of the start of the diagnostic. + * @param length The length of the diagnostic. * @param diag_id The diagnostic ID. * @param ... The arguments to the format string for the message. * @return Whether the diagnostic was successfully appended. */ -bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...); +bool pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...); /** * Deallocate the internal state of the given diagnostic list. diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index 121dd4b2b652f7..88f8525f8008fb 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -447,12 +447,12 @@ pm_diagnostic_level(pm_diagnostic_id_t diag_id) { * Append an error to the given list of diagnostic. */ bool -pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) { +pm_diagnostic_list_append(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t)); if (diagnostic == NULL) return false; *diagnostic = (pm_diagnostic_t) { - .location = { start, end }, + .location = { .start = start, .length = length }, .diag_id = diag_id, .message = pm_diagnostic_message(diag_id), .owned = false, @@ -468,7 +468,7 @@ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t * * string for its message. */ bool -pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...) { +pm_diagnostic_list_append_format(pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...) { va_list arguments; va_start(arguments, diag_id); @@ -485,19 +485,19 @@ pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const ui return false; } - size_t length = (size_t) (result + 1); - char *message = (char *) xmalloc(length); + size_t message_length = (size_t) (result + 1); + char *message = (char *) xmalloc(message_length); if (message == NULL) { xfree(diagnostic); return false; } va_start(arguments, diag_id); - vsnprintf(message, length, format, arguments); + vsnprintf(message, message_length, format, arguments); va_end(arguments); *diagnostic = (pm_diagnostic_t) { - .location = { start, end }, + .location = { .start = start, .length = length }, .diag_id = diag_id, .message = message, .owned = true, diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb index 2357e552000bc3..f1709a0249c3d7 100644 --- a/prism/templates/src/node.c.erb +++ b/prism/templates/src/node.c.erb @@ -226,10 +226,8 @@ pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constan } static void -pm_dump_json_location(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_location_t *location) { - uint32_t start = (uint32_t) (location->start - parser->start); - uint32_t end = (uint32_t) (location->end - parser->start); - pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"end\":%" PRIu32 "}", start, end); +pm_dump_json_location(pm_buffer_t *buffer, const pm_location_t *location) { + pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"length\":%" PRIu32 "}", location->start, location->length); } /** @@ -243,7 +241,7 @@ pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *no pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>); const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node; - pm_dump_json_location(buffer, parser, &cast->base.location); + pm_dump_json_location(buffer, &cast->base.location); <%- [*node.flags, *node.fields].each_with_index do |field, index| -%> // Dump the <%= field.name %> field @@ -290,10 +288,10 @@ pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *no } pm_buffer_append_byte(buffer, ']'); <%- when Prism::Template::LocationField -%> - pm_dump_json_location(buffer, parser, &cast-><%= field.name %>); + pm_dump_json_location(buffer, &cast-><%= field.name %>); <%- when Prism::Template::OptionalLocationField -%> - if (cast-><%= field.name %>.start != NULL) { - pm_dump_json_location(buffer, parser, &cast-><%= field.name %>); + if (cast-><%= field.name %>.length != 0) { + pm_dump_json_location(buffer, &cast-><%= field.name %>); } else { pm_buffer_append_string(buffer, "null", 4); } diff --git a/prism/templates/src/prettyprint.c.erb b/prism/templates/src/prettyprint.c.erb index 639c2fecf33ba3..74c0f6dbdf8478 100644 --- a/prism/templates/src/prettyprint.c.erb +++ b/prism/templates/src/prettyprint.c.erb @@ -13,7 +13,7 @@ void pm_prettyprint(void) {} static inline void prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) { pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start, parser->start_line); - pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->end, parser->start_line); + pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->start + location->length, parser->start_line); pm_buffer_append_format(output_buffer, "(%" PRIi32 ",%" PRIu32 ")-(%" PRIi32 ",%" PRIu32 ")", start.line, start.column, end.line, end.column); } @@ -106,17 +106,17 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm pm_buffer_append_byte(output_buffer, ' '); prettyprint_location(output_buffer, parser, location); pm_buffer_append_string(output_buffer, " = \"", 4); - pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY); + pm_buffer_append_source(output_buffer, parser->start + location->start, (size_t) location->length, PM_BUFFER_ESCAPING_RUBY); pm_buffer_append_string(output_buffer, "\"\n", 2); <%- when Prism::Template::OptionalLocationField -%> pm_location_t *location = &cast-><%= field.name %>; - if (location->start == NULL) { + if (location->length == 0) { pm_buffer_append_string(output_buffer, " nil\n", 5); } else { pm_buffer_append_byte(output_buffer, ' '); prettyprint_location(output_buffer, parser, location); pm_buffer_append_string(output_buffer, " = \"", 4); - pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY); + pm_buffer_append_source(output_buffer, parser->start + location->start, (size_t) location->length, PM_BUFFER_ESCAPING_RUBY); pm_buffer_append_string(output_buffer, "\"\n", 2); } <%- when Prism::Template::UInt8Field -%> diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb index 0f0aace445a680..958b0fd7cf075c 100644 --- a/prism/templates/src/serialize.c.erb +++ b/prism/templates/src/serialize.c.erb @@ -20,13 +20,9 @@ pm_sizet_to_u32(size_t value) { } static void -pm_serialize_location(const pm_parser_t *parser, const pm_location_t *location, pm_buffer_t *buffer) { - assert(location->start); - assert(location->end); - assert(location->start <= location->end); - - pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(location->start - parser->start)); - pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(location->end - location->start)); +pm_serialize_location(const pm_location_t *location, pm_buffer_t *buffer) { + pm_buffer_append_varuint(buffer, location->start); + pm_buffer_append_varuint(buffer, location->length); } static void @@ -77,7 +73,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { <%- if Prism::Template::INCLUDE_NODE_ID -%> pm_buffer_append_varuint(buffer, node->node_id); <%- end -%> - pm_serialize_location(parser, &node->location, buffer); + pm_serialize_location(&node->location, buffer); switch (PM_NODE_TYPE(node)) { // We do not need to serialize a ScopeNode ever as @@ -123,15 +119,15 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { } <%- when Prism::Template::LocationField -%> <%- if field.should_be_serialized? -%> - pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); + pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); <%- end -%> <%- when Prism::Template::OptionalLocationField -%> <%- if field.should_be_serialized? -%> - if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) { + if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.length == 0) { pm_buffer_append_byte(buffer, 0); } else { pm_buffer_append_byte(buffer, 1); - pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); + pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); } <%- end -%> <%- when Prism::Template::UInt8Field -%> @@ -169,60 +165,60 @@ pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) { } static void -pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) { +pm_serialize_comment(pm_comment_t *comment, pm_buffer_t *buffer) { // serialize type pm_buffer_append_byte(buffer, (uint8_t) comment->type); // serialize location - pm_serialize_location(parser, &comment->location, buffer); + pm_serialize_location(&comment->location, buffer); } /** * Serialize the given list of comments to the given buffer. */ void -pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) { +pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer) { pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list))); pm_comment_t *comment; for (comment = (pm_comment_t *) list->head; comment != NULL; comment = (pm_comment_t *) comment->node.next) { - pm_serialize_comment(parser, comment, buffer); + pm_serialize_comment(comment, buffer); } } static void -pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) { +pm_serialize_magic_comment(pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) { // serialize key location - pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start)); - pm_buffer_append_varuint(buffer, pm_sizet_to_u32(magic_comment->key_length)); + pm_buffer_append_varuint(buffer, magic_comment->key.start); + pm_buffer_append_varuint(buffer, magic_comment->key.length); // serialize value location - pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start)); - pm_buffer_append_varuint(buffer, pm_sizet_to_u32(magic_comment->value_length)); + pm_buffer_append_varuint(buffer, magic_comment->value.start); + pm_buffer_append_varuint(buffer, magic_comment->value.length); } static void -pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) { +pm_serialize_magic_comment_list(pm_list_t *list, pm_buffer_t *buffer) { pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list))); pm_magic_comment_t *magic_comment; for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) { - pm_serialize_magic_comment(parser, magic_comment, buffer); + pm_serialize_magic_comment(magic_comment, buffer); } } static void pm_serialize_data_loc(const pm_parser_t *parser, pm_buffer_t *buffer) { - if (parser->data_loc.end == NULL) { + if (parser->data_loc.length == 0) { pm_buffer_append_byte(buffer, 0); } else { pm_buffer_append_byte(buffer, 1); - pm_serialize_location(parser, &parser->data_loc, buffer); + pm_serialize_location(&parser->data_loc, buffer); } } static void -pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) { +pm_serialize_diagnostic(pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) { // serialize the type pm_buffer_append_varuint(buffer, (uint32_t) diagnostic->diag_id); @@ -232,18 +228,18 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf pm_buffer_append_string(buffer, diagnostic->message, message_length); // serialize location - pm_serialize_location(parser, &diagnostic->location, buffer); + pm_serialize_location(&diagnostic->location, buffer); pm_buffer_append_byte(buffer, diagnostic->level); } static void -pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) { +pm_serialize_diagnostic_list(pm_list_t *list, pm_buffer_t *buffer) { pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list))); pm_diagnostic_t *diagnostic; for (diagnostic = (pm_diagnostic_t *) list->head; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) { - pm_serialize_diagnostic(parser, diagnostic, buffer); + pm_serialize_diagnostic(diagnostic, buffer); } } @@ -263,12 +259,12 @@ pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) { pm_buffer_append_varsint(buffer, parser->start_line); pm_serialize_newline_list(&parser->newline_list, buffer); <%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS -%> - pm_serialize_comment_list(parser, &parser->comment_list, buffer); + pm_serialize_comment_list(&parser->comment_list, buffer); <%- end -%> - pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer); + pm_serialize_magic_comment_list(&parser->magic_comment_list, buffer); pm_serialize_data_loc(parser, buffer); - pm_serialize_diagnostic_list(parser, &parser->error_list, buffer); - pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer); + pm_serialize_diagnostic_list(&parser->error_list, buffer); + pm_serialize_diagnostic_list(&parser->warning_list, buffer); } #line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/token_type.c.erb index f196393ee12661..5c6f2713100f91 100644 --- a/prism/templates/src/token_type.c.erb +++ b/prism/templates/src/token_type.c.erb @@ -31,10 +31,6 @@ pm_token_type_human(pm_token_type_t token_type) { switch (token_type) { case PM_TOKEN_EOF: return "end-of-input"; - case PM_TOKEN_MISSING: - return "missing token"; - case PM_TOKEN_NOT_PROVIDED: - return "not provided token"; case PM_TOKEN_AMPERSAND: return "'&'"; case PM_TOKEN_AMPERSAND_AMPERSAND: diff --git a/prism/util/pm_char.c b/prism/util/pm_char.c index a51dc11645ff63..748582b7fe10ae 100644 --- a/prism/util/pm_char.c +++ b/prism/util/pm_char.c @@ -83,15 +83,15 @@ pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) { * searching past the given maximum number of characters. */ size_t -pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) { +pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list, uint32_t start_offset) { if (length <= 0) return 0; - size_t size = 0; - size_t maximum = (size_t) length; + uint32_t size = 0; + uint32_t maximum = (uint32_t) length; while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) { if (string[size] == '\n') { - pm_newline_list_append(newline_list, string + size); + pm_newline_list_append(newline_list, start_offset + size + 1); } size++; diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h index deeafd632177da..b213e8edeefbe8 100644 --- a/prism/util/pm_char.h +++ b/prism/util/pm_char.h @@ -31,10 +31,12 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length); * @param string The string to search. * @param length The maximum number of characters to search. * @param newline_list The list of newlines to populate. + * @param start_offset The offset at which the string occurs in the source, for + * the purpose of tracking newlines. * @return The number of characters at the start of the string that are * whitespace. */ -size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list); +size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list, uint32_t start_offset); /** * Returns the number of characters at the start of the string that are inline diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c index 8331618f54ff20..89c294a6d777e0 100644 --- a/prism/util/pm_newline_list.c +++ b/prism/util/pm_newline_list.c @@ -5,12 +5,10 @@ * allocation of the offsets succeeds, otherwise returns false. */ bool -pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) { - list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t)); +pm_newline_list_init(pm_newline_list_t *list, size_t capacity) { + list->offsets = (uint32_t *) xcalloc(capacity, sizeof(uint32_t)); if (list->offsets == NULL) return false; - list->start = start; - // This is 1 instead of 0 because we want to include the first line of the // file as having offset 0, which is set because of calloc. list->size = 1; @@ -32,24 +30,20 @@ pm_newline_list_clear(pm_newline_list_t *list) { * the offsets succeeds (if one was necessary), otherwise returns false. */ bool -pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) { +pm_newline_list_append(pm_newline_list_t *list, uint32_t cursor) { if (list->size == list->capacity) { - size_t *original_offsets = list->offsets; + uint32_t *original_offsets = list->offsets; list->capacity = (list->capacity * 3) / 2; - list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t)); + list->offsets = (uint32_t *) xcalloc(list->capacity, sizeof(uint32_t)); if (list->offsets == NULL) return false; - memcpy(list->offsets, original_offsets, list->size * sizeof(size_t)); + memcpy(list->offsets, original_offsets, list->size * sizeof(uint32_t)); xfree(original_offsets); } - assert(*cursor == '\n'); - assert(cursor >= list->start); - size_t newline_offset = (size_t) (cursor - list->start + 1); - - assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]); - list->offsets[list->size++] = newline_offset; + assert(list->size == 0 || cursor > list->offsets[list->size - 1]); + list->offsets[list->size++] = cursor; return true; } @@ -59,21 +53,18 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) { * line of the closest offset less than the given offset is returned. */ int32_t -pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) { - assert(cursor >= list->start); - size_t offset = (size_t) (cursor - list->start); - +pm_newline_list_line(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line) { size_t left = 0; size_t right = list->size - 1; while (left <= right) { size_t mid = left + (right - left) / 2; - if (list->offsets[mid] == offset) { + if (list->offsets[mid] == cursor) { return ((int32_t) mid) + start_line; } - if (list->offsets[mid] < offset) { + if (list->offsets[mid] < cursor) { left = mid + 1; } else { right = mid - 1; @@ -89,21 +80,18 @@ pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32 * are returned. */ pm_line_column_t -pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) { - assert(cursor >= list->start); - size_t offset = (size_t) (cursor - list->start); - +pm_newline_list_line_column(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line) { size_t left = 0; size_t right = list->size - 1; while (left <= right) { size_t mid = left + (right - left) / 2; - if (list->offsets[mid] == offset) { + if (list->offsets[mid] == cursor) { return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 }); } - if (list->offsets[mid] < offset) { + if (list->offsets[mid] < cursor) { left = mid + 1; } else { right = mid - 1; @@ -112,7 +100,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor return ((pm_line_column_t) { .line = ((int32_t) left) + start_line - 1, - .column = (uint32_t) (offset - list->offsets[left - 1]) + .column = cursor - list->offsets[left - 1] }); } diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h index 406abe8ba59062..b27c031de871b7 100644 --- a/prism/util/pm_newline_list.h +++ b/prism/util/pm_newline_list.h @@ -26,9 +26,6 @@ * sorted/inserted in ascending order. */ typedef struct { - /** A pointer to the start of the source string. */ - const uint8_t *start; - /** The number of offsets in the list. */ size_t size; @@ -36,7 +33,7 @@ typedef struct { size_t capacity; /** The list of offsets. */ - size_t *offsets; + uint32_t *offsets; } pm_newline_list_t; /** @@ -55,41 +52,39 @@ typedef struct { * allocation of the offsets succeeds, otherwise returns false. * * @param list The list to initialize. - * @param start A pointer to the start of the source string. * @param capacity The initial capacity of the list. * @return True if the allocation of the offsets succeeds, otherwise false. */ -bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity); +bool pm_newline_list_init(pm_newline_list_t *list, size_t capacity); /** * Clear out the newlines that have been appended to the list. * * @param list The list to clear. */ -void -pm_newline_list_clear(pm_newline_list_t *list); +void pm_newline_list_clear(pm_newline_list_t *list); /** * Append a new offset to the newline list. Returns true if the reallocation of * the offsets succeeds (if one was necessary), otherwise returns false. * * @param list The list to append to. - * @param cursor A pointer to the offset to append. + * @param cursor The offset to append. * @return True if the reallocation of the offsets succeeds (if one was * necessary), otherwise false. */ -bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor); +bool pm_newline_list_append(pm_newline_list_t *list, uint32_t cursor); /** * Returns the line of the given offset. If the offset is not in the list, the * line of the closest offset less than the given offset is returned. * * @param list The list to search. - * @param cursor A pointer to the offset to search for. + * @param cursor The offset to search for. * @param start_line The line to start counting from. * @return The line of the given offset. */ -int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line); +int32_t pm_newline_list_line(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line); /** * Returns the line and column of the given offset. If the offset is not in the @@ -97,11 +92,11 @@ int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *curso * are returned. * * @param list The list to search. - * @param cursor A pointer to the offset to search for. + * @param cursor The offset to search for. * @param start_line The line to start counting from. * @return The line and column of the given offset. */ -pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line); +pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line); /** * Free the internal memory allocated for the newline list. diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c index 916a4cc3fd3c16..60c67b29831344 100644 --- a/prism/util/pm_strpbrk.c +++ b/prism/util/pm_strpbrk.c @@ -4,22 +4,22 @@ * Add an invalid multibyte character error to the parser. */ static inline void -pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start); +pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, uint32_t start, uint32_t length) { + pm_diagnostic_list_append_format(&parser->error_list, start, length, PM_ERR_INVALID_MULTIBYTE_CHARACTER, parser->start[start]); } /** * Set the explicit encoding for the parser to the current encoding. */ static inline void -pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) { +pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t length) { if (parser->explicit_encoding != NULL) { if (parser->explicit_encoding == parser->encoding) { // Okay, we already locked to this encoding. } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) { // Not okay, we already found a Unicode escape sequence and this // conflicts. - pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name); + pm_diagnostic_list_append_format(&parser->error_list, start, length, PM_ERR_MIXED_ENCODING, parser->encoding->name); } else { // Should not be anything else. assert(false && "unreachable"); @@ -61,7 +61,7 @@ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *chars index++; } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0); - pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index); + pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start)); } } } @@ -81,7 +81,7 @@ pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t return source + index; } - if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1); + if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), 1); index++; } @@ -105,7 +105,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t index++; } else { size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index)); - if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width); + if (validate) pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), (uint32_t) width); if (width > 0) { index += width; @@ -122,7 +122,7 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t index++; } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0); - pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index); + pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start)); } } } @@ -148,7 +148,7 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t index++; } else { size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index)); - pm_strpbrk_explicit_encoding_set(parser, source, width); + pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), (uint32_t) width); if (width > 0) { index += width; @@ -163,7 +163,7 @@ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t index++; } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0); - pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index); + pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start)); } } } diff --git a/prism_compile.c b/prism_compile.c index 788968113090cd..f3aae95487c869 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -144,7 +144,7 @@ pm_iseq_add_setlocal(rb_iseq_t *iseq, LINK_ANCHOR *const seq, int line, int node ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, ((const pm_node_t *) (node))->location.start, (parser)->start_line), .node_id = ((const pm_node_t *) (node))->node_id }) #define PM_NODE_END_LOCATION(parser, node) \ - ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, ((const pm_node_t *) (node))->location.end, (parser)->start_line), .node_id = ((const pm_node_t *) (node))->node_id }) + ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, ((const pm_node_t *) (node))->location.start + ((const pm_node_t *) (node))->location.length, (parser)->start_line), .node_id = ((const pm_node_t *) (node))->node_id }) #define PM_LOCATION_START_LOCATION(parser, location, id) \ ((pm_node_location_t) { .line = pm_newline_list_line(&(parser)->newline_list, (location)->start, (parser)->start_line), .node_id = id }) @@ -153,7 +153,7 @@ pm_iseq_add_setlocal(rb_iseq_t *iseq, LINK_ANCHOR *const seq, int line, int node pm_newline_list_line_column(&(parser)->newline_list, ((const pm_node_t *) (node))->location.start, (parser)->start_line) #define PM_NODE_END_LINE_COLUMN(parser, node) \ - pm_newline_list_line_column(&(parser)->newline_list, ((const pm_node_t *) (node))->location.end, (parser)->start_line) + pm_newline_list_line_column(&(parser)->newline_list, ((const pm_node_t *) (node))->location.start + ((const pm_node_t *) (node))->location.length, (parser)->start_line) #define PM_LOCATION_START_LINE_COLUMN(parser, location) \ pm_newline_list_line_column(&(parser)->newline_list, (location)->start, (parser)->start_line) @@ -3231,7 +3231,7 @@ pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_ scope->base.type = PM_SCOPE_NODE; scope->base.location.start = node->location.start; - scope->base.location.end = node->location.end; + scope->base.location.length = node->location.length; scope->previous = previous; scope->ast_node = (pm_node_t *) node; @@ -3272,7 +3272,7 @@ pm_scope_node_init(const pm_node_t *node, pm_scope_node_t *scope, pm_scope_node_ if (cast->statements != NULL) { scope->base.location.start = cast->statements->base.location.start; - scope->base.location.end = cast->statements->base.location.end; + scope->base.location.length = cast->statements->base.location.length; } break; @@ -3652,7 +3652,7 @@ static void pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *const ret, bool popped, pm_scope_node_t *scope_node, ID method_id, LABEL *start) { const pm_location_t *message_loc = &call_node->message_loc; - if (message_loc->start == NULL) message_loc = &call_node->base.location; + if (message_loc->length == 0) message_loc = &call_node->base.location; const pm_node_location_t location = PM_LOCATION_START_LOCATION(scope_node->parser, message_loc, call_node->base.node_id); @@ -3666,16 +3666,34 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { if (PM_BRANCH_COVERAGE_P(iseq)) { - const uint8_t *cursors[3] = { - call_node->closing_loc.end, - call_node->arguments == NULL ? NULL : call_node->arguments->base.location.end, - call_node->message_loc.end - }; + uint32_t end_cursor; + bool end_found = false; + + if (call_node->closing_loc.length > 0) { + uint32_t cursor = call_node->closing_loc.start + call_node->closing_loc.length; + end_cursor = cursor; + end_found = true; + } + + if (call_node->arguments != NULL) { + uint32_t cursor = call_node->arguments->base.location.start + call_node->arguments->base.location.length; + if (!end_found || cursor > end_cursor) { + end_cursor = cursor; + end_found = true; + } + } - const uint8_t *end_cursor = cursors[0]; - end_cursor = (end_cursor == NULL || cursors[1] == NULL) ? cursors[1] : (end_cursor > cursors[1] ? end_cursor : cursors[1]); - end_cursor = (end_cursor == NULL || cursors[2] == NULL) ? cursors[2] : (end_cursor > cursors[2] ? end_cursor : cursors[2]); - if (!end_cursor) end_cursor = call_node->closing_loc.end; + if (call_node->message_loc.length > 0) { + uint32_t cursor = call_node->message_loc.start + call_node->message_loc.length; + if (!end_found || cursor > end_cursor) { + end_cursor = cursor; + end_found = true; + } + } + + if (!end_found) { + end_cursor = call_node->closing_loc.start + call_node->closing_loc.length; + } const pm_line_column_t start_location = PM_NODE_START_LINE_COLUMN(scope_node->parser, call_node); const pm_line_column_t end_location = pm_newline_list_line_column(&scope_node->parser->newline_list, end_cursor, scope_node->parser->start_line); @@ -3822,9 +3840,9 @@ pm_compile_call(rb_iseq_t *iseq, const pm_call_node_t *call_node, LINK_ANCHOR *c * node. */ static inline VALUE -pm_compile_back_reference_ref(const pm_back_reference_read_node_t *node) +pm_compile_back_reference_ref(const pm_scope_node_t *scope_node, const pm_back_reference_read_node_t *node) { - const char *type = (const char *) (node->base.location.start + 1); + const char *type = (const char *) (scope_node->parser->start + node->base.location.start + 1); // Since a back reference is `$`, Ruby represents the ID as an // rb_intern on the value after the `$`. @@ -4215,7 +4233,7 @@ pm_compile_defined_expr0(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_l // defined?($+) // ^^ const pm_back_reference_read_node_t *cast = (const pm_back_reference_read_node_t *) node; - VALUE ref = pm_compile_back_reference_ref(cast); + VALUE ref = pm_compile_back_reference_ref(scope_node, cast); PUSH_INSN(ret, location, putnil); PUSH_INSN3(ret, location, defined, INT2FIX(DEFINED_REF), ref, PUSH_VAL(DEFINED_GVAR)); @@ -7066,13 +7084,13 @@ pm_compile_alias_global_variable_node(rb_iseq_t *iseq, const pm_alias_global_var { const pm_location_t *name_loc = &node->new_name->location; - VALUE operand = ID2SYM(rb_intern3((const char *) name_loc->start, name_loc->end - name_loc->start, scope_node->encoding)); + VALUE operand = ID2SYM(rb_intern3((const char *) (scope_node->parser->start + name_loc->start), name_loc->length, scope_node->encoding)); PUSH_INSN1(ret, *location, putobject, operand); } { const pm_location_t *name_loc = &node->old_name->location; - VALUE operand = ID2SYM(rb_intern3((const char *) name_loc->start, name_loc->end - name_loc->start, scope_node->encoding)); + VALUE operand = ID2SYM(rb_intern3((const char *) (scope_node->parser->start + name_loc->start), name_loc->length, scope_node->encoding)); PUSH_INSN1(ret, *location, putobject, operand); } @@ -7351,7 +7369,7 @@ pm_compile_call_node(rb_iseq_t *iseq, const pm_call_node_t *node, LINK_ANCHOR *c ID method_id = pm_constant_id_lookup(scope_node, node->name); const pm_location_t *message_loc = &node->message_loc; - if (message_loc->start == NULL) message_loc = &node->base.location; + if (message_loc->length == 0) message_loc = &node->base.location; const pm_node_location_t location = PM_LOCATION_START_LOCATION(scope_node->parser, message_loc, node->base.node_id); const char *builtin_func; @@ -8719,7 +8737,7 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // ^^ if (!popped) { const pm_back_reference_read_node_t *cast = (const pm_back_reference_read_node_t *) node; - VALUE backref = pm_compile_back_reference_ref(cast); + VALUE backref = pm_compile_back_reference_ref(scope_node, cast); PUSH_INSN2(ret, location, getspecial, INT2FIX(1), backref); } @@ -10578,7 +10596,7 @@ pm_parse_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_li for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != finish; error = (pm_diagnostic_t *) error->node.next) { pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line); - pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line); + pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.start + error->location.length, start_line); // We're going to insert this error into the array in sorted order. We // do this by finding the first error that has a line number greater @@ -10942,7 +10960,7 @@ static bool pm_parse_process_error_utf8_p(const pm_parser_t *parser, const pm_location_t *location) { const size_t start_line = pm_newline_list_line_column(&parser->newline_list, location->start, 1).line; - const size_t end_line = pm_newline_list_line_column(&parser->newline_list, location->end, 1).line; + const size_t end_line = pm_newline_list_line_column(&parser->newline_list, location->start + location->length, 1).line; const uint8_t *start = parser->start + parser->newline_list.offsets[start_line - 1]; const uint8_t *end = ((end_line == parser->newline_list.size) ? parser->end : (parser->start + parser->newline_list.offsets[end_line])); diff --git a/ruby.c b/ruby.c index 28f43176d61d70..cd5c8d1d15d66e 100644 --- a/ruby.c +++ b/ruby.c @@ -2200,7 +2200,7 @@ prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result) // If we found an __END__ marker, then we're going to define a global // DATA constant that is a file object that can be read to read the // contents after the marker. - if (NIL_P(error) && result->parser.data_loc.start != NULL) { + if (NIL_P(error) && result->parser.data_loc.length != 0) { rb_define_global_const("DATA", rb_stdin); } } @@ -2237,17 +2237,17 @@ prism_script(ruby_cmdline_options_t *opt, pm_parse_result_t *result) // If we found an __END__ marker, then we're going to define a global // DATA constant that is a file object that can be read to read the // contents after the marker. - if (NIL_P(error) && result->parser.data_loc.start != NULL) { + if (NIL_P(error) && result->parser.data_loc.length != 0) { int xflag = opt->xflag; VALUE file = open_load_file(script_name, &xflag); const pm_parser_t *parser = &result->parser; - size_t offset = parser->data_loc.start - parser->start + 7; + uint32_t offset = parser->data_loc.start + 7; if ((parser->start + offset < parser->end) && parser->start[offset] == '\r') offset++; if ((parser->start + offset < parser->end) && parser->start[offset] == '\n') offset++; - rb_funcall(file, rb_intern_const("seek"), 2, SIZET2NUM(offset), INT2FIX(SEEK_SET)); + rb_funcall(file, rb_intern_const("seek"), 2, UINT2NUM(offset), INT2FIX(SEEK_SET)); rb_define_global_const("DATA", file); } } diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb index aa264ae5b7a08f..cbe8b06ad659f4 100644 --- a/test/prism/errors_test.rb +++ b/test/prism/errors_test.rb @@ -45,19 +45,19 @@ def test_embdoc_ending def test_unterminated_string_closing statement = Prism.parse_statement("'hello") assert_equal statement.unescaped, "hello" - assert_empty statement.closing + assert_nil statement.closing end def test_unterminated_interpolated_string_closing statement = Prism.parse_statement('"hello') assert_equal statement.unescaped, "hello" - assert_empty statement.closing + assert_nil statement.closing end def test_unterminated_empty_string_closing statement = Prism.parse_statement('"') assert_empty statement.unescaped - assert_empty statement.closing + assert_nil statement.closing end def test_invalid_message_name @@ -84,7 +84,7 @@ def test_regexp_encoding_option_mismatch_error def test_incomplete_def_closing_loc statement = Prism.parse_statement("def f; 123") - assert_empty(statement.end_keyword) + assert_nil(statement.end_keyword) end private diff --git a/test/prism/result/overlap_test.rb b/test/prism/result/overlap_test.rb index 155bc870d36f74..d605eeca44f64d 100644 --- a/test/prism/result/overlap_test.rb +++ b/test/prism/result/overlap_test.rb @@ -33,8 +33,13 @@ def assert_overlap(fixture) queue << child if compare - assert_operator current.location.start_offset, :<=, child.location.start_offset - assert_operator current.location.end_offset, :>=, child.location.end_offset + assert_operator current.location.start_offset, :<=, child.location.start_offset, -> { + "[#{fixture.full_path}] Parent node #{current.class} at #{current.location} does not start before child node #{child.class} at #{child.location}" + } + + assert_operator current.location.end_offset, :>=, child.location.end_offset, -> { + "[#{fixture.full_path}] Parent node #{current.class} at #{current.location} does not end after child node #{child.class} at #{child.location}" + } end end end diff --git a/test/prism/result/source_location_test.rb b/test/prism/result/source_location_test.rb index 38b971d02b8321..993150f58127d8 100644 --- a/test/prism/result/source_location_test.rb +++ b/test/prism/result/source_location_test.rb @@ -935,16 +935,16 @@ def assert_location(kind, source, expected = 0...source.length, **options) node = yield node if block_given? if expected.begin == 0 - assert_equal 0, node.location.start_column + assert_equal 0, node.location.start_column, "#{kind} start_column" end if expected.end == source.length - assert_equal source.split("\n").last.length, node.location.end_column + assert_equal source.split("\n").last.length, node.location.end_column, "#{kind} end_column" end assert_kind_of kind, node - assert_equal expected.begin, node.location.start_offset - assert_equal expected.end, node.location.end_offset + assert_equal expected.begin, node.location.start_offset, "#{kind} start_offset" + assert_equal expected.end, node.location.end_offset, "#{kind} end_offset" end end end From 6687167b435bbdf6a92df7057638144d947c8331 Mon Sep 17 00:00:00 2001 From: git Date: Tue, 27 Jan 2026 20:31:17 +0000 Subject: [PATCH 29/77] Update default gems list at ec154654a99c07d065108e9c31793e [ci skip] --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 6f8ac408e52e4b..38ca8667a87ecb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,7 +46,7 @@ releases. * RubyGems 4.1.0.dev * bundler 4.1.0.dev -* prism 1.8.0 +* prism 1.9.0 * stringio 3.2.1.dev * strscan 3.1.7.dev * syntax_suggest 2.0.3 From 1cd32536a56adc81e3a0f861ad43e5aaf8457a28 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 27 Jan 2026 15:42:24 -0500 Subject: [PATCH 30/77] [ruby/prism] Mark STATES as a private constant https://github.com/ruby/prism/commit/c78f742581 --- lib/prism/lex_compat.rb | 6 +++--- lib/prism/translation/ripper/lexer.rb | 11 ++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 4960230bcf1499..e69f4109dc16ab 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -639,7 +639,7 @@ def result event = RIPPER.fetch(token.type) value = token.value - lex_state = Translation::Ripper::Lexer::State.cached(lex_state) + lex_state = Translation::Ripper::Lexer::State[lex_state] token = case event @@ -691,7 +691,7 @@ def result counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0 end - Translation::Ripper::Lexer::State.cached(result_value[current_index][1]) + Translation::Ripper::Lexer::State[result_value[current_index][1]] else previous_state end @@ -828,7 +828,7 @@ def result def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token) new_tokens = [] - prev_token_state = Translation::Ripper::Lexer::State.cached(Translation::Ripper::EXPR_BEG) + prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG] prev_token_end = bom ? 3 : 0 tokens.each do |token| diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb index bed863af081b79..cbcdcd47cc3af0 100644 --- a/lib/prism/translation/ripper/lexer.rb +++ b/lib/prism/translation/ripper/lexer.rb @@ -9,7 +9,6 @@ class Ripper class Lexer < Ripper # :nodoc: # :stopdoc: class State - attr_reader :to_int, :to_s def initialize(i) @@ -39,10 +38,12 @@ def allbits?(i) to_int.allbits?(i) end def anybits?(i) to_int.anybits?(i) end def nobits?(i) to_int.nobits?(i) end - # Instances are frozen and there are only a handful of them so we cache them here. - STATES = Hash.new { |h,k| h[k] = State.new(k) } + # Instances are frozen and there are only a handful of them so we + # cache them here. + STATES = Hash.new { |hash, key| hash[key] = State.new(key) } + private_constant :STATES - def self.cached(i) + def self.[](i) STATES[i] end end @@ -54,7 +55,7 @@ def initialize(pos, event, tok, state, message = nil) @pos = pos @event = event @tok = tok - @state = State.cached(state) + @state = State[state] @message = message end From 5c15f9380f9782750ee223140108950a5262075e Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Tue, 27 Jan 2026 21:20:48 +0100 Subject: [PATCH 31/77] [ruby/prism] Use the terminology "column in bytes/characters/code units" * Consistent and clear. * Avoids the confusion that "column number" might be understood as a column in an editor starting at 1 (they all start at 0). https://github.com/ruby/prism/commit/91f1c4b9d5 --- lib/prism/parse_result.rb | 20 ++++++++++---------- lib/prism/translation/ripper.rb | 2 +- prism/templates/lib/prism/dot_visitor.rb.erb | 2 +- prism/util/pm_newline_list.h | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index be1c13f97c917d..2498ae7e145fed 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -103,7 +103,7 @@ def line_end(byte_offset) offsets[find_line(byte_offset) + 1] || source.bytesize end - # Return the column number for the given byte offset. + # Return the column in bytes for the given byte offset. def column(byte_offset) byte_offset - line_start(byte_offset) end @@ -113,7 +113,7 @@ def character_offset(byte_offset) (source.byteslice(0, byte_offset) or raise).length end - # Return the column number in characters for the given byte offset. + # Return the column in characters for the given byte offset. def character_column(byte_offset) character_offset(byte_offset) - character_offset(line_start(byte_offset)) end @@ -146,7 +146,7 @@ def code_units_cache(encoding) CodeUnitsCache.new(source, encoding) end - # Returns the column number in code units for the given encoding for the + # Returns the column in code units for the given encoding for the # given byte offset. def code_units_column(byte_offset, encoding) code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding) @@ -253,7 +253,7 @@ def character_offset(byte_offset) byte_offset end - # Return the column number in characters for the given byte offset. + # Return the column in characters for the given byte offset. def character_column(byte_offset) byte_offset - line_start(byte_offset) end @@ -428,19 +428,19 @@ def end_line source.line(end_offset) end - # The column number in bytes where this location starts from the start of + # The column in bytes where this location starts from the start of # the line. def start_column source.column(start_offset) end - # The column number in characters where this location ends from the start of + # The column in characters where this location ends from the start of # the line. def start_character_column source.character_column(start_offset) end - # The column number in code units of the given encoding where this location + # The column in code units of the given encoding where this location # starts from the start of the line. def start_code_units_column(encoding = Encoding::UTF_16LE) source.code_units_column(start_offset, encoding) @@ -452,19 +452,19 @@ def cached_start_code_units_column(cache) cache[start_offset] - cache[source.line_start(start_offset)] end - # The column number in bytes where this location ends from the start of the + # The column in bytes where this location ends from the start of the # line. def end_column source.column(end_offset) end - # The column number in characters where this location ends from the start of + # The column in characters where this location ends from the start of # the line. def end_character_column source.character_column(end_offset) end - # The column number in code units of the given encoding where this location + # The column in code units of the given encoding where this location # ends from the start of the line. def end_code_units_column(encoding = Encoding::UTF_16LE) source.code_units_column(end_offset, encoding) diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index 70f72132f64bd9..ccce226d7def48 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -475,7 +475,7 @@ def self.lex_state_name(state) # The current line number of the parser. attr_reader :lineno - # The current column number of the parser. + # The current column in bytes of the parser. attr_reader :column # Create a new Translation::Ripper object with the given source. diff --git a/prism/templates/lib/prism/dot_visitor.rb.erb b/prism/templates/lib/prism/dot_visitor.rb.erb index cd2998fe61b9d6..87de1965b083b0 100644 --- a/prism/templates/lib/prism/dot_visitor.rb.erb +++ b/prism/templates/lib/prism/dot_visitor.rb.erb @@ -169,7 +169,7 @@ module Prism "Node_#{node.object_id}" end - # Inspect a location to display the start and end line and column numbers. + # Inspect a location to display the start and end line and columns in bytes. def location_inspect(location) "(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column})" end diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h index b27c031de871b7..dd3e625089ba3e 100644 --- a/prism/util/pm_newline_list.h +++ b/prism/util/pm_newline_list.h @@ -43,7 +43,7 @@ typedef struct { /** The line number. */ int32_t line; - /** The column number. */ + /** The column in bytes. */ uint32_t column; } pm_line_column_t; From d5616094f4625cc4ba96c28c3c84967af742a1dc Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 27 Jan 2026 15:50:58 -0500 Subject: [PATCH 32/77] [ruby/prism] Mark insert_on_sp as private https://github.com/ruby/prism/commit/db72066cde --- lib/prism/lex_compat.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index e69f4109dc16ab..c23adda2412a99 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -820,12 +820,14 @@ def result end # Add :on_sp tokens - tokens = add_on_sp_tokens(tokens, source, result.data_loc, bom, eof_token) + tokens = insert_on_sp(tokens, source, result.data_loc, bom, eof_token) Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source) end - def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token) + private + + def insert_on_sp(tokens, source, data_loc, bom, eof_token) new_tokens = [] prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG] From 5eb17ea4aae32d657c1816de64acd3ef7abb454f Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 27 Jan 2026 17:07:32 -0500 Subject: [PATCH 33/77] [DOC] Fix hash style in Hash#flatten --- hash.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hash.c b/hash.c index e116eb8ab6f026..83a55913fa7ce4 100644 --- a/hash.c +++ b/hash.c @@ -4492,21 +4492,21 @@ flatten_i(VALUE key, VALUE val, VALUE ary) * Examples; note that entry foo: {bar: 1, baz: 2} is never flattened. * * h = {foo: {bar: 1, baz: 2}, bat: [:bam, [:bap, [:bah]]]} - * h.flatten(1) # => [:foo, {:bar=>1, :baz=>2}, :bat, [:bam, [:bap, [:bah]]]] - * h.flatten(2) # => [:foo, {:bar=>1, :baz=>2}, :bat, :bam, [:bap, [:bah]]] - * h.flatten(3) # => [:foo, {:bar=>1, :baz=>2}, :bat, :bam, :bap, [:bah]] - * h.flatten(4) # => [:foo, {:bar=>1, :baz=>2}, :bat, :bam, :bap, :bah] - * h.flatten(5) # => [:foo, {:bar=>1, :baz=>2}, :bat, :bam, :bap, :bah] + * h.flatten(1) # => [:foo, {bar: 1, baz: 2}, :bat, [:bam, [:bap, [:bah]]]] + * h.flatten(2) # => [:foo, {bar: 1, baz: 2}, :bat, :bam, [:bap, [:bah]]] + * h.flatten(3) # => [:foo, {bar: 1, baz: 2}, :bat, :bam, :bap, [:bah]] + * h.flatten(4) # => [:foo, {bar: 1, baz: 2}, :bat, :bam, :bap, :bah] + * h.flatten(5) # => [:foo, {bar: 1, baz: 2}, :bat, :bam, :bap, :bah] * * With negative integer +depth+, * flattens all levels: * - * h.flatten(-1) # => [:foo, {:bar=>1, :baz=>2}, :bat, :bam, :bap, :bah] + * h.flatten(-1) # => [:foo, {bar: 1, baz: 2}, :bat, :bam, :bap, :bah] * * With +depth+ zero, * returns the equivalent of #to_a: * - * h.flatten(0) # => [[:foo, {:bar=>1, :baz=>2}], [:bat, [:bam, [:bap, [:bah]]]]] + * h.flatten(0) # => [[:foo, {bar: 1, baz: 2}], [:bat, [:bam, [:bap, [:bah]]]]] * * Related: see {Methods for Converting}[rdoc-ref:Hash@Methods+for+Converting]. */ From bea48adbcacc29cce9536977e15ceba0d65c8a02 Mon Sep 17 00:00:00 2001 From: Kuba Suder Date: Wed, 28 Jan 2026 04:48:37 +0200 Subject: [PATCH 34/77] [DOC] fixed pid shown in the doc comment on fork --- process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/process.c b/process.c index 19f3172cb82ec6..006611d525e68f 100644 --- a/process.c +++ b/process.c @@ -4233,7 +4233,7 @@ rb_proc__fork(VALUE _obj) * puts "Before the fork: #{Process.pid}" * fork do * puts "In the child process: #{Process.pid}" - * end # => 382141 + * end # => 420520 * puts "After the fork: #{Process.pid}" * * Output: From 3b22e32fa50c2c18663be87dad4d11a266954773 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Jan 2026 08:44:13 -0500 Subject: [PATCH 35/77] [ruby/prism] Use align keywords instead of the header OpenBSD is advertising to the preprocessor that it supports C11 but does not include the stdalign.h header. We do not actually need the header, since we can just use the keywords. https://github.com/ruby/prism/commit/b3e2708fff --- prism/defines.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/prism/defines.h b/prism/defines.h index c41e6031a3fefc..f6bd1dbe40edec 100644 --- a/prism/defines.h +++ b/prism/defines.h @@ -263,13 +263,11 @@ * specify alignment in a compiler-agnostic way. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */ - #include - /** Specify alignment for a type or variable. */ - #define PRISM_ALIGNAS(size) alignas(size) + #define PRISM_ALIGNAS _Alignas /** Get the alignment requirement of a type. */ - #define PRISM_ALIGNOF(type) alignof(type) + #define PRISM_ALIGNOF _Alignof #elif defined(__GNUC__) || defined(__clang__) /** Specify alignment for a type or variable. */ #define PRISM_ALIGNAS(size) __attribute__((aligned(size))) From 01ace0655ed84708f0afdcc74fb779e680bfc4e0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Jan 2026 09:12:51 -0500 Subject: [PATCH 36/77] [ruby/prism] Remove tokens from lex compat Instead of having custom classes, use arrays and track which tokens we should ignore the state for in the test. https://github.com/ruby/prism/commit/a333b56ada --- lib/prism/lex_compat.rb | 150 ++++++++------------------------ lib/prism/translation/ripper.rb | 2 +- test/prism/ruby/ripper_test.rb | 14 +-- 3 files changed, 46 insertions(+), 120 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index c23adda2412a99..4c516a9de0acb9 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -196,57 +196,6 @@ def deconstruct_keys(keys) "__END__": :on___end__ }.freeze - # When we produce tokens, we produce the same arrays that Ripper does. - # However, we add a couple of convenience methods onto them to make them a - # little easier to work with. We delegate all other methods to the array. - class Token < BasicObject - # Create a new token object with the given ripper-compatible array. - def initialize(array) - @array = array - end - - # The location of the token in the source. - def location - @array[0] - end - - # The type of the token. - def event - @array[1] - end - - # The slice of the source that this token represents. - def value - @array[2] - end - - # The state of the lexer when this token was produced. - def state - @array[3] - end - - # We want to pretend that this is just an Array. - def ==(other) # :nodoc: - @array == other - end - - def respond_to_missing?(name, include_private = false) # :nodoc: - @array.respond_to?(name, include_private) - end - - def method_missing(name, ...) # :nodoc: - @array.send(name, ...) - end - end - - # Tokens where state should be ignored - # used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end - class IgnoreStateToken < Token - def ==(other) # :nodoc: - self[0...-1] == other[0...-1] - end - end - # A heredoc in this case is a list of tokens that belong to the body of the # heredoc that should be appended onto the list of tokens when the heredoc # closes. @@ -290,7 +239,7 @@ def to_a embexpr_balance = 0 tokens.each_with_object([]) do |token, results| #$ Array[Token] - case token.event + case token[1] when :on_embexpr_beg embexpr_balance += 1 results << token @@ -305,9 +254,9 @@ def to_a if split # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind # to keep the delimiter in the result. - token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| + token[2].split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + results << [[lineno, column], :on_tstring_content, value, token[3]] lineno += value.count("\n") end else @@ -350,7 +299,7 @@ def initialize # whitespace on plain string content tokens. This allows us to later # remove that amount of whitespace from the beginning of each line. def <<(token) - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg @embexpr_balance += 1 @dedent = 0 if @dedent_next && @ended_on_newline @@ -358,7 +307,7 @@ def <<(token) @embexpr_balance -= 1 when :on_tstring_content if embexpr_balance == 0 - line = token.value + line = token[2] if dedent_next && !(line.strip.empty? && line.end_with?("\n")) leading = line[/\A(\s*)\n?/, 1] @@ -381,7 +330,7 @@ def <<(token) end end - @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0 + @dedent_next = token[1] == :on_tstring_content && embexpr_balance == 0 @ended_on_newline = false tokens << token end @@ -394,7 +343,7 @@ def to_a embexpr_balance = 0 tokens.each do |token| - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg embexpr_balance += 1 results << token @@ -406,9 +355,9 @@ def to_a lineno = token[0][0] column = token[0][1] - token.value.split(/(?<=\n)/).each_with_index do |value, index| + token[2].split(/(?<=\n)/).each_with_index do |value, index| column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + results << [[lineno, column], :on_tstring_content, value, token[3]] lineno += 1 end else @@ -436,15 +385,15 @@ def to_a results << token index += 1 - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg embexpr_balance += 1 when :on_embexpr_end, :on_heredoc_end embexpr_balance -= 1 when :on_tstring_content if embexpr_balance == 0 - while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/) - token.value << tokens[index].value + while index < max_index && tokens[index][1] == :on_tstring_content && !token[2].match?(/\\\r?\n\z/) + token[2] << tokens[index][2] index += 1 end end @@ -467,7 +416,7 @@ def to_a # whitespace calculation we performed above. This is because # checking if the subsequent token needs to be dedented is common to # both the dedent calculation and the ignored_sp insertion. - case token.event + case token[1] when :on_embexpr_beg embexpr_balance += 1 results << token @@ -479,7 +428,7 @@ def to_a # Here we're going to split the string on newlines, but maintain # the newlines in the resulting array. We'll do that with a look # behind assertion. - splits = token.value.split(/(?<=\n)/) + splits = token[2].split(/(?<=\n)/) index = 0 while index < splits.length @@ -536,12 +485,12 @@ def to_a ignored = deleted_chars.join line.delete_prefix!(ignored) - results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]]) + results << [[lineno, 0], :on_ignored_sp, ignored, token[3]] column = ignored.length end end - results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty? + results << [[lineno, column], token[1], line, token[3]] unless line.empty? index += 1 end else @@ -552,7 +501,7 @@ def to_a end dedent_next = - ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) && + ((token[1] == :on_tstring_content) || (token[1] == :on_heredoc_end)) && embexpr_balance == 0 end @@ -563,11 +512,11 @@ def to_a # Here we will split between the two types of heredocs and return the # object that will store their tokens. def self.build(opening) - case opening.value[2] + case opening[2][2] when "~" DedentingHeredoc.new when "-" - DashHeredoc.new(opening.value[3] != "'") + DashHeredoc.new(opening[2][3] != "'") else PlainHeredoc.new end @@ -647,16 +596,16 @@ def result # Ripper doesn't include the rest of the token in the event, so we need to # trim it down to just the content on the first line. value = value[0..value.index("\n")] - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_comment - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_heredoc_end # Heredoc end tokens can be emitted in an odd order, so we don't # want to bother comparing the state on them. last_heredoc_end = token.location.end_offset - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_embexpr_end - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_words_sep # Ripper emits one token each per line. value.each_line.with_index do |line, index| @@ -664,7 +613,7 @@ def result lineno += 1 column = 0 end - tokens << Token.new([[lineno, column], event, line, lex_state]) + tokens << [[lineno, column], event, line, lex_state] end tokens.pop when :on_regexp_end @@ -696,7 +645,7 @@ def result previous_state end - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_eof eof_token = token previous_token = result_value[index - 1][0] @@ -721,13 +670,13 @@ def result end_offset += 3 end - tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state]) + tokens << [[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state] end end - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] else - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] end previous_state = lex_state @@ -813,9 +762,8 @@ def result tokens = tokens[0...-1] # We sort by location because Ripper.lex sorts. - # Manually implemented instead of `sort_by!(&:location)` for performance. tokens.sort_by! do |token| - line, column = token.location + line, column = token[0] source.byte_offset(line, column) end @@ -834,7 +782,7 @@ def insert_on_sp(tokens, source, data_loc, bom, eof_token) prev_token_end = bom ? 3 : 0 tokens.each do |token| - line, column = token.location + line, column = token[0] start_offset = source.byte_offset(line, column) # Ripper reports columns on line 1 without counting the BOM, so we @@ -858,50 +806,28 @@ def insert_on_sp(tokens, source, data_loc, bom, eof_token) continuation = sp_value[continuation_index...next_whitespace_index] second_whitespace = sp_value[next_whitespace_index..] - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column], - :on_sp, - first_whitespace, - prev_token_state - ]) unless first_whitespace.empty? - - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column + continuation_index], - :on_sp, - continuation, - prev_token_state - ]) - - new_tokens << IgnoreStateToken.new([ - [sp_line + 1, 0], - :on_sp, - second_whitespace, - prev_token_state - ]) unless second_whitespace.empty? + new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty? + new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state] + new_tokens << [[sp_line + 1, 0], :on_sp, second_whitespace, prev_token_state] unless second_whitespace.empty? else - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column], - :on_sp, - sp_value, - prev_token_state - ]) + new_tokens << [[sp_line, sp_column], :on_sp, sp_value, prev_token_state] end end new_tokens << token - prev_token_state = token.state - prev_token_end = start_offset + token.value.bytesize + prev_token_state = token[3] + prev_token_end = start_offset + token[2].bytesize end unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl end_offset = eof_token.location.end_offset if prev_token_end < end_offset - new_tokens << IgnoreStateToken.new([ + new_tokens << [ [source.line(prev_token_end), source.column(prev_token_end)], :on_sp, source.slice(prev_token_end, end_offset - prev_token_end), prev_token_state - ]) + ] end end diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index ccce226d7def48..054ad88ce3e8a3 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -88,7 +88,7 @@ def self.lex(src, filename = "-", lineno = 1, raise_errors: false) # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"] # def self.tokenize(...) - lex(...).map(&:value) + lex(...).map { |token| token[2] } end # This contains a table of all of the parser events and their diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index c8d259135f47cc..a89a9503b98fd4 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -136,7 +136,7 @@ def test_lexer assert_equal(expected, lexer.parse[0].to_a) assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a) - assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event)) + assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map { |token| token[1] }) assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) } end @@ -169,13 +169,13 @@ def assert_ripper_lex(source) # Prism emits tokens by their order in the code, not in parse order ripper.sort_by! { |elem| elem[0] } - [prism.size, ripper.size].max.times do |i| - expected = ripper[i] - actual = prism[i] + [prism.size, ripper.size].max.times do |index| + expected = ripper[index] + actual = prism[index] - # Since tokens related to heredocs are not emitted in the same order, - # the state also doesn't line up. - if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end + # There are some tokens that have slightly different state that do not + # effect the parse tree, so they may not match. + if expected && actual && expected[1] == actual[1] && %i[on_comment on_heredoc_end on_embexpr_end on_sp].include?(expected[1]) expected[3] = actual[3] = nil end From 8d41e57efe2e985e9496e91d63ba25ef0df2399b Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Wed, 28 Jan 2026 12:32:59 -0500 Subject: [PATCH 37/77] Revert "Prevent starvation when acquiring mutex over and over (#15877)" (#15990) This reverts commit 994257ab06072df38de024e70a60aa9a87e36089. I saw some failures in CI that are probably related to the change. Example: ``` 1) Failure: TestMonitor#test_timedwait [/Users/runner/work/ruby/ruby/src/test/monitor/test_monitor.rb:282]: ``` This starvation problem has not been an issue in real apps afaik, so for now it's best to revert it and think of a better solution. --- test/ruby/test_thread.rb | 35 ----------------------------------- thread_sync.c | 24 ++---------------------- 2 files changed, 2 insertions(+), 57 deletions(-) diff --git a/test/ruby/test_thread.rb b/test/ruby/test_thread.rb index 47a8e94c07c008..b2d8e73693807c 100644 --- a/test/ruby/test_thread.rb +++ b/test/ruby/test_thread.rb @@ -1664,39 +1664,4 @@ def test_mn_threads_sub_millisecond_sleep assert_operator elapsed, :>=, 0.1, "sub-millisecond sleeps should not return immediately" end; end - - # [Bug #21840] - def test_mutex_owner_doesnt_starve_waiters - assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}") - begin; - require "tempfile" - temp = Tempfile.new("temp") - m = Mutex.new - - def fib(n) - return n if n <= 1 - fib(n - 1) + fib(n - 2) - end - - t1_running = false - Thread.new do - t1_running = true - loop do - fib(20) - m.synchronize do - File.open(temp.path) { } # reset timeslice due to blocking operation - end - end - end - - loop until t1_running - - 3.times.map do - Thread.new do - m.synchronize do - end - end - end.each(&:join) - end; - end end diff --git a/thread_sync.c b/thread_sync.c index 8b86c903809c8d..e3916c97cbd0a6 100644 --- a/thread_sync.c +++ b/thread_sync.c @@ -10,8 +10,6 @@ typedef struct rb_mutex_struct { rb_thread_t *th; // even if the fiber is collected, we might need access to the thread in mutex_free struct rb_mutex_struct *next_mutex; struct ccan_list_head waitq; /* protected by GVL */ - uint32_t saved_running_time_us; - bool wait_waking; // Is there a thread waiting to be woken up by this mutex? Reset during every wakeup. } rb_mutex_t; /* sync_waiter is always on-stack */ @@ -214,15 +212,8 @@ mutex_locked(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) static inline bool do_mutex_trylock(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) { - // NOTE: we can successfully lock a mutex even if there are other threads waiting on it. First one to it wins. if (mutex->ec_serial == 0) { RUBY_DEBUG_LOG("%p ok", mutex); - if (mutex->wait_waking) { - // If we acquired `mutex` without contention and before the thread that was popped off the waitq, we're going - // to set our running_time back to what it was here during mutex unlock if it got reset during our critical - // section. This is to prevent starvation of other threads waiting on the mutex. - mutex->saved_running_time_us = th->running_time_us; - } mutex_locked(mutex, th, ec_serial); return true; @@ -359,8 +350,7 @@ do_mutex_lock(struct mutex_args *args, int interruptible_p) } ccan_list_del(&sync_waiter.node); - // If mutex->ec_serial != 0, the mutex was locked by another thread before we had the chance to acquire it. - // We'll put ourselves on the waitq and sleep again. + // unlocked by another thread while sleeping if (!mutex->ec_serial) { mutex_set_owner(mutex, th, ec_serial); } @@ -401,7 +391,6 @@ do_mutex_lock(struct mutex_args *args, int interruptible_p) if (saved_ints) th->ec->interrupt_flag = saved_ints; if (mutex->ec_serial == ec_serial) mutex_locked(mutex, th, ec_serial); - mutex->wait_waking = false; } RUBY_DEBUG_LOG("%p locked", mutex); @@ -465,15 +454,6 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) struct sync_waiter *cur = 0, *next; - - if (mutex->wait_waking && ec_serial) { - uint32_t saved = mutex->saved_running_time_us; - if (th->running_time_us < saved) { - th->running_time_us = saved; - } - } - - mutex->saved_running_time_us = 0; mutex->ec_serial = 0; thread_mutex_remove(th, mutex); @@ -489,7 +469,6 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) case THREAD_RUNNABLE: /* from someone else calling Thread#run */ case THREAD_STOPPED_FOREVER: /* likely (rb_mutex_lock) */ RUBY_DEBUG_LOG("wakeup th:%u", rb_th_serial(cur->th)); - mutex->wait_waking = true; rb_threadptr_interrupt(cur->th); return NULL; case THREAD_STOPPED: /* probably impossible */ @@ -501,6 +480,7 @@ rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_serial_t ec_serial) } } } + // We did not find any threads to wake up, so we can just return with no error: return NULL; } From 72f11c8bfaedb4260fe9d29b2f9bd26d83b8c18a Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Wed, 28 Jan 2026 15:37:14 -0500 Subject: [PATCH 38/77] Attempt to fix failing monitor test (#15992) --- test/monitor/test_monitor.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/monitor/test_monitor.rb b/test/monitor/test_monitor.rb index 4c55afca6c869e..7a26831bafe2ba 100644 --- a/test/monitor/test_monitor.rb +++ b/test/monitor/test_monitor.rb @@ -274,7 +274,7 @@ def test_timedwait @monitor.synchronize do queue2.enq(nil) assert_equal("foo", b) - result2 = cond.wait(0.1) + result2 = cond.wait(10) assert_equal(true, result2) assert_equal("bar", b) end From 2d5460e4d78032584dbf261259a8b05b856dffdf Mon Sep 17 00:00:00 2001 From: Godfrey Chan Date: Wed, 28 Jan 2026 13:32:33 -0800 Subject: [PATCH 39/77] ZJIT: Optimize send-with-block to iseq methods (#15911) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ~~Two commits:~~ ### zjit: Optimize send-with-block to iseq methods This commit enables JIT-to-JIT calls for send instructions with literal blocks (e.g., `foo { |x| x * 2 }`), rather than falling back to the `rb_vm_send` C wrapper. This optimization applies to both methods with explicit block parameters (`def foo(&block)`) and methods implicitly accepting a block (`def foo; yield if block_given?; end`). Prior to this change, any callsite with a block would miss out on the JIT-to-JIT fast path and goes through a `rb_vm_send` C wrapper call. Initially, as https://github.com/Shopify/ruby/issues/931 suggested, we assumed this would involve changes to the JIT-to-JIT calling convention to accommodate passing a block argument. However, during implementation, I discovered that @nirvdrum had already wired up the `specval` protocol used by the interpreter in their `invokesuper` work (https://github.com/ruby/ruby/pull/887). That infrastructure remained dormant but was exactly what we needed here. After plumbing everything through, it Just Worked™. It may be possible to design a more direct JIT-to-JIT protocol for passing blocks. In the HIR for `def foo(&block)`, the BB for the JIT entrypoint already takes two arguments (self + &block, presumably), and since `yield` is a keyword, it may be possible to rewrite the implicit case to be explicit (thanks @tenderlove for the idea), and do "better" than passing via `specval`. I'm not sure if that's a goal eventually, but in any case, if `specval` works, there is no harm in enabling this optimization today. Implementation notes: This initial pass largely duplicates the existing `SendWithoutBlock` to `SendWithoutBlockDirect` specialization logic. A future refactor could potentially collapse Send and SendWithoutBlock into a single instruction variant (with `blockiseq: Option`, you can always pattern match the Option if needed), since they now follow very similar paths. However, I wanted to keep this PR focused and also get feedback on that direction first before committing to such a big refactor. The optimization currently handles `VM_METHOD_TYPE_ISEQ` only. It does not handle "block to block" `VM_METHOD_TYPE_BMETHOD`. It's unclear if that'd be all that difficult, I just didn't try. Happy to do it as a follow-up. Any callsites not handled by this specialization continue to fallthrough to the existing rb_vm_send harness safely. Test coverage includes both explicit block parameters and yield-based methods. Thanks to @tenderlove for initial ideas and walkthrough, and @nirvdrum for the foundation this builds on. Closes https://github.com/Shopify/ruby/issues/931 ### ~~zjit: Allow SendWithoutBlockDirect to def f(&blk)~~ Saving this for another time ### Follow-ups * [ ] Refactor and simplify by merging `hir::Insn::Send` and `hir::Insn::SendWithoutBlock`. (Pending feedback/approval.) * [ ] Handle block-to-block `VM_METHOD_TYPE_BMETHOD` calls. It appears that `gen_send_iseq_direct` already attempted to handle it. * [ ] As far as I can tell, we should be able to just enable `super { ... }` too, happy to do that as a follow-up if @nirvdrum doesn't have time for it. --- zjit/src/codegen.rs | 48 +++++++--- zjit/src/hir.rs | 188 +++++++++++++++++++++++++++----------- zjit/src/hir/opt_tests.rs | 122 ++++++++++++++----------- zjit/src/hir/tests.rs | 8 +- zjit/src/stats.rs | 8 +- 5 files changed, 248 insertions(+), 126 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index a77bd7debd0dfc..5d6060dd49d3a3 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -478,8 +478,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::Snapshot { .. } => return Ok(()), // we don't need to do anything for this instruction at the moment &Insn::Send { cd, blockiseq, state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason), + Insn::SendDirect { cme, iseq, recv, args, kw_bits, blockiseq, state, .. } => gen_send_iseq_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), *kw_bits, &function.frame_state(*state), *blockiseq), &Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason), - Insn::SendWithoutBlockDirect { cme, iseq, recv, args, kw_bits, state, .. } => gen_send_iseq_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), *kw_bits, &function.frame_state(*state), None), &Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::InvokeSuperForward { cd, blockiseq, state, reason, .. } => gen_invokesuperforward(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason), @@ -1024,6 +1024,15 @@ fn gen_ccall(asm: &mut Assembler, cfunc: *const u8, name: ID, recv: Opnd, args: asm.ccall(cfunc, cfunc_args) } +// Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). +// VM_CFP_TO_CAPTURED_BLOCK then turns &cfp->self into a block handler. +// rb_captured_block->code.iseq aliases with cfp->block_code. +fn gen_block_handler_specval(asm: &mut Assembler, blockiseq: IseqPtr) -> lir::Opnd { + asm.store(Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(blockiseq).into()); + let cfp_self_addr = asm.lea(Opnd::mem(VALUE_BITS, CFP, RUBY_OFFSET_CFP_SELF)); + asm.or(cfp_self_addr, Opnd::Imm(1)) +} + /// Generate code for a variadic C function call /// func(int argc, VALUE *argv, VALUE recv) fn gen_ccall_variadic( @@ -1053,13 +1062,8 @@ fn gen_ccall_variadic( gen_spill_stack(jit, asm, state); gen_spill_locals(jit, asm, state); - let block_handler_specval = if let Some(block_iseq) = blockiseq { - // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). - // VM_CFP_TO_CAPTURED_BLOCK then turns &cfp->self into a block handler. - // rb_captured_block->code.iseq aliases with cfp->block_code. - asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); - let cfp_self_addr = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); - asm.or(cfp_self_addr, Opnd::Imm(1)) + let block_handler_specval = if let Some(blockiseq) = blockiseq { + gen_block_handler_specval(asm, blockiseq) } else { VM_BLOCK_HANDLER_NONE.into() }; @@ -1446,7 +1450,7 @@ fn gen_send_iseq_direct( args: Vec, kw_bits: u32, state: &FrameState, - block_handler: Option, + blockiseq: Option, ) -> lir::Opnd { gen_incr_counter(asm, Counter::iseq_optimized_send_count); @@ -1462,6 +1466,12 @@ fn gen_send_iseq_direct( gen_spill_locals(jit, asm, state); gen_spill_stack(jit, asm, state); + // This mirrors vm_caller_setup_arg_block() in for the `blockiseq != NULL` case. + // The HIR specialization guards ensure we will only reach here for literal blocks, + // not &block forwarding, &:foo, etc. Thise are rejected in `type_specialize` by + // `unspecializable_call_type`. + let block_handler = blockiseq.map(|b| gen_block_handler_specval(asm, b)); + let (frame_type, specval) = if VM_METHOD_TYPE_BMETHOD == unsafe { get_cme_def_type(cme) } { // Extract EP from the Proc instance let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; @@ -1513,11 +1523,25 @@ fn gen_send_iseq_direct( asm.mov(CFP, new_cfp); asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP as i32), CFP); + let params = unsafe { iseq.params() }; + + // For &block, the JIT entrypoint expects the block_handler as an argument + // This HIR param is not actually used, things read from specval from the VM frame today. + // TODO: Remove unused param from HIR, or pass specval through c_args. + // See https://github.com/ruby/ruby/pull/15911#discussion_r2710544982 + let needs_block = params.flags.has_block() != 0; + // Set up arguments - let mut c_args = vec![recv]; + let mut c_args = Vec::with_capacity({ + // This is a heuristic to avoid re-allocation, not necessary for correctness + 1 /* recv */ + args.len() + if needs_block { 1 } else { 0 } + }); + c_args.push(recv); c_args.extend(&args); + if needs_block { + c_args.push(specval); + } - let params = unsafe { iseq.params() }; let num_optionals_passed = if params.flags.has_opt() != 0 { // See vm_call_iseq_setup_normal_opt_start in vm_inshelper.c let lead_num = params.lead_num as u32; @@ -2714,7 +2738,7 @@ fn function_stub_hit_body(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result Ok(jit_entry_ptr) } -/// Compile a stub for an ISEQ called by SendWithoutBlockDirect +/// Compile a stub for an ISEQ called by SendDirect fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result { let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); asm.new_block_without_id(); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 51ab45937cb5d2..b523d8430f3e5e 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -625,10 +625,10 @@ pub enum SendFallbackReason { SendWithoutBlockNotOptimizedNeedPermission, SendWithoutBlockBopRedefined, SendWithoutBlockOperandsNotFixnum, - SendWithoutBlockDirectKeywordMismatch, - SendWithoutBlockDirectKeywordCountMismatch, - SendWithoutBlockDirectMissingKeyword, - SendWithoutBlockDirectTooManyKeywords, + SendDirectKeywordMismatch, + SendDirectKeywordCountMismatch, + SendDirectMissingKeyword, + SendDirectTooManyKeywords, SendPolymorphic, SendMegamorphic, SendNoProfiles, @@ -686,10 +686,10 @@ impl Display for SendFallbackReason { SendNotOptimizedNeedPermission => write!(f, "Send: method private or protected and no FCALL"), SendWithoutBlockBopRedefined => write!(f, "SendWithoutBlock: basic operation was redefined"), SendWithoutBlockOperandsNotFixnum => write!(f, "SendWithoutBlock: operands are not fixnums"), - SendWithoutBlockDirectKeywordMismatch => write!(f, "SendWithoutBlockDirect: keyword mismatch"), - SendWithoutBlockDirectKeywordCountMismatch => write!(f, "SendWithoutBlockDirect: keyword count mismatch"), - SendWithoutBlockDirectMissingKeyword => write!(f, "SendWithoutBlockDirect: missing keyword"), - SendWithoutBlockDirectTooManyKeywords => write!(f, "SendWithoutBlockDirect: too many keywords for fixnum bitmask"), + SendDirectKeywordMismatch => write!(f, "SendDirect: keyword mismatch"), + SendDirectKeywordCountMismatch => write!(f, "SendDirect: keyword count mismatch"), + SendDirectMissingKeyword => write!(f, "SendDirect: missing keyword"), + SendDirectTooManyKeywords => write!(f, "SendDirect: too many keywords for fixnum bitmask"), SendPolymorphic => write!(f, "Send: polymorphic call site"), SendMegamorphic => write!(f, "Send: megamorphic call site"), SendNoProfiles => write!(f, "Send: no profile data available"), @@ -959,13 +959,14 @@ pub enum Insn { }, /// Optimized ISEQ call - SendWithoutBlockDirect { + SendDirect { recv: InsnId, cd: *const rb_call_data, cme: *const rb_callable_method_entry_t, iseq: IseqPtr, args: Vec, kw_bits: u32, + blockiseq: Option, state: InsnId, }, @@ -1193,7 +1194,7 @@ impl Insn { Insn::InvokeSuper { .. } => effects::Any, Insn::InvokeSuperForward { .. } => effects::Any, Insn::InvokeBlock { .. } => effects::Any, - Insn::SendWithoutBlockDirect { .. } => effects::Any, + Insn::SendDirect { .. } => effects::Any, Insn::InvokeBuiltin { .. } => effects::Any, Insn::EntryPoint { .. } => effects::Any, Insn::Return { .. } => effects::Any, @@ -1446,8 +1447,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { write!(f, " # SendFallbackReason: {reason}")?; Ok(()) } - Insn::SendWithoutBlockDirect { recv, cd, iseq, args, .. } => { - write!(f, "SendWithoutBlockDirect {recv}, :{} ({:?})", ruby_call_method_name(*cd), self.ptr_map.map_ptr(iseq))?; + Insn::SendDirect { recv, cd, iseq, args, blockiseq, .. } => { + write!(f, "SendDirect {recv}, {:p}, :{} ({:?})", self.ptr_map.map_ptr(blockiseq), ruby_call_method_name(*cd), self.ptr_map.map_ptr(iseq))?; for arg in args { write!(f, ", {arg}")?; } @@ -1838,7 +1839,8 @@ pub enum ValidationError { MiscValidationError(InsnId, String), } -fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq_t, ci: *const rb_callinfo, send_insn: InsnId, args: &[InsnId]) -> bool { +/// Check if we can do a direct send to the given iseq with the given args. +fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq_t, ci: *const rb_callinfo, send_insn: InsnId, args: &[InsnId], blockiseq: Option) -> bool { let mut can_send = true; let mut count_failure = |counter| { can_send = false; @@ -1846,10 +1848,14 @@ fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq }; let params = unsafe { iseq.params() }; + let caller_has_literal_block: bool = blockiseq.is_some(); + let callee_has_block_param = 0 != params.flags.has_block(); + use Counter::*; if 0 != params.flags.has_rest() { count_failure(complex_arg_pass_param_rest) } if 0 != params.flags.has_post() { count_failure(complex_arg_pass_param_post) } - if 0 != params.flags.has_block() { count_failure(complex_arg_pass_param_block) } + if callee_has_block_param && !caller_has_literal_block + { count_failure(complex_arg_pass_param_block) } if 0 != params.flags.forwardable() { count_failure(complex_arg_pass_param_forwardable) } if 0 != params.flags.has_kwrest() { count_failure(complex_arg_pass_param_kwrest) } @@ -1884,7 +1890,11 @@ fn can_direct_send(function: &mut Function, block: BlockId, iseq: *const rb_iseq let kwarg = unsafe { rb_vm_ci_kwarg(ci) }; let caller_kw_count = if kwarg.is_null() { 0 } else { (unsafe { get_cikw_keyword_len(kwarg) }) as usize }; let caller_positional = args.len() - caller_kw_count; - let final_argc = caller_positional + kw_total_num as usize; + // Right now, the JIT entrypoint accepts the block as an param + // We may remove it, remove the block_arg addition to match + // See: https://github.com/ruby/ruby/pull/15911#discussion_r2710544982 + let block_arg = if 0 != params.flags.has_block() { 1 } else { 0 }; + let final_argc = caller_positional + kw_total_num as usize + block_arg; if final_argc + 1 > C_ARG_OPNDS.len() { // +1 for self function.set_dynamic_send_reason(send_insn, TooManyArgsForLir); return false; @@ -2261,13 +2271,14 @@ impl Function { state, reason, }, - &SendWithoutBlockDirect { recv, cd, cme, iseq, ref args, kw_bits, state } => SendWithoutBlockDirect { + &SendDirect { recv, cd, cme, iseq, ref args, kw_bits, blockiseq, state } => SendDirect { recv: find!(recv), cd, cme, iseq, args: find_vec!(args), kw_bits, + blockiseq, state, }, &Send { recv, cd, blockiseq, ref args, state, reason } => Send { @@ -2499,7 +2510,7 @@ impl Function { Insn::FixnumRShift { .. } => types::Fixnum, Insn::PutSpecialObject { .. } => types::BasicObject, Insn::SendWithoutBlock { .. } => types::BasicObject, - Insn::SendWithoutBlockDirect { .. } => types::BasicObject, + Insn::SendDirect { .. } => types::BasicObject, Insn::Send { .. } => types::BasicObject, Insn::SendForward { .. } => types::BasicObject, Insn::InvokeSuper { .. } => types::BasicObject, @@ -2666,7 +2677,7 @@ impl Function { } /// Prepare arguments for a direct send, handling keyword argument reordering and default synthesis. - /// Returns the (state, processed_args, kw_bits) to use for the SendWithoutBlockDirect instruction, + /// Returns the (state, processed_args, kw_bits) to use for the SendDirect instruction, /// or Err with the fallback reason if direct send isn't possible. fn prepare_direct_send_args( &mut self, @@ -2711,7 +2722,7 @@ impl Function { if callee_keyword.is_null() { if !kwarg.is_null() { // Caller is passing kwargs but callee doesn't expect them. - return Err(SendWithoutBlockDirectKeywordMismatch); + return Err(SendDirectKeywordMismatch); } // Neither caller nor callee have keywords - nothing to do return Ok((args.to_vec(), args.len(), 0)); @@ -2724,7 +2735,7 @@ impl Function { // When there are 31+ keywords, CRuby uses a hash instead of a fixnum bitmask // for kw_bits. Fall back to VM dispatch for this rare case. if callee_kw_count >= VM_KW_SPECIFIED_BITS_MAX as usize { - return Err(SendWithoutBlockDirectTooManyKeywords); + return Err(SendDirectTooManyKeywords); } let callee_kw_required = unsafe { (*callee_keyword).required_num } as usize; @@ -2733,7 +2744,7 @@ impl Function { // Caller can't provide more keywords than callee expects (no **kwrest support yet). if caller_kw_count > callee_kw_count { - return Err(SendWithoutBlockDirectKeywordCountMismatch); + return Err(SendDirectKeywordCountMismatch); } // The keyword arguments are the last arguments in the args vector. @@ -2763,7 +2774,7 @@ impl Function { if !found { // Caller is passing an unknown keyword - this will raise ArgumentError. // Fall back to VM dispatch to handle the error. - return Err(SendWithoutBlockDirectKeywordMismatch); + return Err(SendDirectKeywordMismatch); } } @@ -2787,7 +2798,7 @@ impl Function { if !found { // Required keyword not provided by caller which will raise an ArgumentError. if i < callee_kw_required { - return Err(SendWithoutBlockDirectMissingKeyword); + return Err(SendDirectMissingKeyword); } // Optional keyword not provided - use default value @@ -2965,9 +2976,11 @@ impl Function { } } - /// Rewrite SendWithoutBlock opcodes into SendWithoutBlockDirect opcodes if we know the target - /// ISEQ statically. This removes run-time method lookups and opens the door for inlining. + /// Rewrite eligible Send/SendWithoutBlock opcodes into SendDirect + /// opcodes if we know the target ISEQ statically. This removes run-time method lookups and + /// opens the door for inlining. /// Also try and inline constant caches, specialize object allocations, and more. + /// Calls to C functions are handled separately in optimize_c_calls. fn type_specialize(&mut self) { for block in self.rpo() { let old_insns = std::mem::take(&mut self.blocks[block.0].insns); @@ -3037,7 +3050,7 @@ impl Function { def_type = unsafe { get_cme_def_type(cme) }; } - // If the call site info indicates that the `Function` has overly complex arguments, then do not optimize into a `SendWithoutBlockDirect`. + // If the call site info indicates that the `Function` has overly complex arguments, then do not optimize into a `SendDirect`. // Optimized methods(`VM_METHOD_TYPE_OPTIMIZED`) handle their own argument constraints (e.g., kw_splat for Proc call). if def_type != VM_METHOD_TYPE_OPTIMIZED && unspecializable_call_type(flags) { self.count_complex_call_features(block, flags); @@ -3050,15 +3063,20 @@ impl Function { // Only specialize positional-positional calls // TODO(max): Handle other kinds of parameter passing let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; - if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice()) { + if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice(), None) { self.push_insn_id(block, insn_id); continue; } + // Check singleton class assumption first, before emitting other patchpoints if !self.assume_no_singleton_classes(block, klass, state) { self.set_dynamic_send_reason(insn_id, SingletonClassSeen); self.push_insn_id(block, insn_id); continue; } + + // Add PatchPoint for method redefinition self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); + + // Add GuardType for profiled receiver if let Some(profiled_type) = profiled_type { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); } @@ -3068,7 +3086,7 @@ impl Function { self.push_insn_id(block, insn_id); continue; }; - let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state }); + let send_direct = self.push_insn(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, blockiseq: None }); self.make_equal_to(insn_id, send_direct); } else if def_type == VM_METHOD_TYPE_BMETHOD { let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; @@ -3083,11 +3101,9 @@ impl Function { let capture = unsafe { proc_block.as_.captured.as_ref() }; let iseq = unsafe { *capture.code.iseq.as_ref() }; - if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice()) { + if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice(), None) { self.push_insn_id(block, insn_id); continue; } - // Can't pass a block to a block for now - assert!((unsafe { rb_vm_ci_flag(ci) } & VM_CALL_ARGS_BLOCKARG) == 0, "SendWithoutBlock but has a block arg"); // Patch points: // Check for "defined with an un-shareable Proc in a different Ractor" @@ -3111,7 +3127,7 @@ impl Function { self.push_insn_id(block, insn_id); continue; }; - let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state }); + let send_direct = self.push_insn(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, blockiseq: None }); self.make_equal_to(insn_id, send_direct); } else if def_type == VM_METHOD_TYPE_IVAR && args.is_empty() { // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. @@ -3240,14 +3256,12 @@ impl Function { self.push_insn_id(block, insn_id); continue; } } - // This doesn't actually optimize Send yet, just replaces the fallback reason to be more precise. - // The actual optimization is done in reduce_send_to_ccall. - Insn::Send { recv, cd, state, .. } => { + Insn::Send { mut recv, cd, state, blockiseq, args, .. } => { let frame_state = self.frame_state(state); - let klass = match self.resolve_receiver_type(recv, self.type_of(recv), frame_state.insn_idx) { - ReceiverTypeResolution::StaticallyKnown { class } => class, + let (klass, profiled_type) = match self.resolve_receiver_type(recv, self.type_of(recv), frame_state.insn_idx) { + ReceiverTypeResolution::StaticallyKnown { class } => (class, None), ReceiverTypeResolution::Monomorphic { profiled_type } - | ReceiverTypeResolution::SkewedPolymorphic { profiled_type } => profiled_type.class(), + | ReceiverTypeResolution::SkewedPolymorphic { profiled_type } => (profiled_type.class(), Some(profiled_type)), ReceiverTypeResolution::SkewedMegamorphic { .. } | ReceiverTypeResolution::Megamorphic => { if get_option!(stats) { @@ -3272,6 +3286,9 @@ impl Function { } }; let ci = unsafe { get_call_data_ci(cd) }; // info about the call site + + let flags = unsafe { rb_vm_ci_flag(ci) }; + let mid = unsafe { vm_ci_mid(ci) }; // Do method lookup let mut cme = unsafe { rb_callable_method_entry(klass, mid) }; @@ -3282,13 +3299,70 @@ impl Function { // Load an overloaded cme if applicable. See vm_search_cc(). // It allows you to use a faster ISEQ if possible. cme = unsafe { rb_check_overloaded_cme(cme, ci) }; + let visibility = unsafe { METHOD_ENTRY_VISI(cme) }; + match (visibility, flags & VM_CALL_FCALL != 0) { + (METHOD_VISI_PUBLIC, _) => {} + (METHOD_VISI_PRIVATE, true) => {} + (METHOD_VISI_PROTECTED, true) => {} + _ => { + self.set_dynamic_send_reason(insn_id, SendNotOptimizedNeedPermission); + self.push_insn_id(block, insn_id); continue; + } + } let mut def_type = unsafe { get_cme_def_type(cme) }; while def_type == VM_METHOD_TYPE_ALIAS { cme = unsafe { rb_aliased_callable_method_entry(cme) }; def_type = unsafe { get_cme_def_type(cme) }; } - self.set_dynamic_send_reason(insn_id, SendNotOptimizedMethodType(MethodType::from(def_type))); - self.push_insn_id(block, insn_id); continue; + + // If the call site info indicates that the `Function` has overly complex arguments, then do not optimize into a `SendDirect`. + // Optimized methods(`VM_METHOD_TYPE_OPTIMIZED`) handle their own argument constraints (e.g., kw_splat for Proc call). + if def_type != VM_METHOD_TYPE_OPTIMIZED && unspecializable_call_type(flags) { + self.count_complex_call_features(block, flags); + self.set_dynamic_send_reason(insn_id, ComplexArgPass); + self.push_insn_id(block, insn_id); continue; + } + + if def_type == VM_METHOD_TYPE_ISEQ { + let iseq = unsafe { get_def_iseq_ptr((*cme).def) }; + if !can_direct_send(self, block, iseq, ci, insn_id, args.as_slice(), Some(blockiseq)) { + self.push_insn_id(block, insn_id); continue; + } + + // Check singleton class assumption first, before emitting other patchpoints + if !self.assume_no_singleton_classes(block, klass, state) { + self.set_dynamic_send_reason(insn_id, SingletonClassSeen); + self.push_insn_id(block, insn_id); continue; + } + + // Add PatchPoint for method redefinition + self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); + + // Add GuardType for profiled receiver + if let Some(profiled_type) = profiled_type { + recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); + } + + let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, iseq, state) + .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else { + self.push_insn_id(block, insn_id); continue; + }; + + let send_direct = self.push_insn(block, Insn::SendDirect { + recv, + cd, + cme, + iseq, + args: processed_args, + kw_bits, + blockiseq: Some(blockiseq), + state: send_state, + }); + self.make_equal_to(insn_id, send_direct); + } else { + self.set_dynamic_send_reason(insn_id, SendNotOptimizedMethodType(MethodType::from(def_type))); + self.push_insn_id(block, insn_id); continue; + } } Insn::GetConstantPath { ic, state, .. } => { let idlist: *const ID = unsafe { (*ic).segments }; @@ -3464,7 +3538,8 @@ impl Function { // Check if the super method's parameters support direct send. // If not, we can't do direct dispatch. let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; - if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice()) { + // TODO: pass Option to can_direct_send when we start specializing super { ... } + if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) { self.push_insn_id(block, insn_id); self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); continue; @@ -3502,15 +3577,16 @@ impl Function { self.push_insn_id(block, insn_id); continue; }; - // Use SendWithoutBlockDirect with the super method's CME and ISEQ. - let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { + // Use SendDirect with the super method's CME and ISEQ. + let send_direct = self.push_insn(block, Insn::SendDirect { recv, cd, cme: super_cme, iseq: super_iseq, args: processed_args, kw_bits, - state: send_state + state: send_state, + blockiseq: None, }); self.make_equal_to(insn_id, send_direct); } @@ -3528,7 +3604,7 @@ impl Function { for insn_id in old_insns { match self.find(insn_id) { // Reject block ISEQs to avoid autosplat and other block parameter complications. - Insn::SendWithoutBlockDirect { recv, iseq, cd, args, state, .. } => { + Insn::SendDirect { recv, iseq, cd, args, state, blockiseq: None, .. } => { let call_info = unsafe { (*cd).ci }; let ci_flags = unsafe { vm_ci_flag(call_info) }; // .send call is not currently supported for builtins @@ -3776,7 +3852,7 @@ impl Function { self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass: recv_class, method: method_id, cme }, state }); } - /// Optimize SendWithoutBlock that land in a C method to a direct CCall without + /// Optimize Send/SendWithoutBlock that land in a C method to a direct CCall without /// runtime lookup. fn optimize_c_calls(&mut self) { if unsafe { rb_zjit_method_tracing_currently_enabled() } { @@ -3841,7 +3917,10 @@ impl Function { // When seeing &block argument, fall back to dynamic dispatch for now // TODO: Support block forwarding if unspecializable_c_call_type(ci_flags) { - fun.count_complex_call_features(block, ci_flags); + // Only count features NOT already counted in type_specialize. + if !unspecializable_call_type(ci_flags) { + fun.count_complex_call_features(block, ci_flags); + } fun.set_dynamic_send_reason(send_insn_id, ComplexArgPass); return Err(()); } @@ -4011,7 +4090,10 @@ impl Function { // Filter for simple call sites (i.e. no splats etc.) if ci_flags & VM_CALL_ARGS_SIMPLE == 0 { - fun.count_complex_call_features(block, ci_flags); + // Only count features NOT already counted in type_specialize. + if !unspecializable_call_type(ci_flags) { + fun.count_complex_call_features(block, ci_flags); + } fun.set_dynamic_send_reason(send_insn_id, ComplexArgPass); return Err(()); } @@ -4091,8 +4173,10 @@ impl Function { // func(int argc, VALUE *argv, VALUE recv) let ci_flags = unsafe { vm_ci_flag(call_info) }; if ci_flags & VM_CALL_ARGS_SIMPLE == 0 { - // TODO(alan): Add fun.count_complex_call_features() here without double - // counting splat + // Only count features NOT already counted in type_specialize. + if !unspecializable_call_type(ci_flags) { + fun.count_complex_call_features(block, ci_flags); + } fun.set_dynamic_send_reason(send_insn_id, ComplexArgPass); return Err(()); } else { @@ -4616,7 +4700,7 @@ impl Function { | &Insn::SendWithoutBlock { recv, ref args, state, .. } | &Insn::CCallVariadic { recv, ref args, state, .. } | &Insn::CCallWithFrame { recv, ref args, state, .. } - | &Insn::SendWithoutBlockDirect { recv, ref args, state, .. } + | &Insn::SendDirect { recv, ref args, state, .. } | &Insn::InvokeBuiltin { recv, ref args, state, .. } | &Insn::InvokeSuper { recv, ref args, state, .. } | &Insn::InvokeSuperForward { recv, ref args, state, .. } @@ -5276,7 +5360,7 @@ impl Function { } // Instructions with recv and a Vec of Ruby objects Insn::SendWithoutBlock { recv, ref args, .. } - | Insn::SendWithoutBlockDirect { recv, ref args, .. } + | Insn::SendDirect { recv, ref args, .. } | Insn::Send { recv, ref args, .. } | Insn::SendForward { recv, ref args, .. } | Insn::InvokeSuper { recv, ref args, .. } diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 0110af3f2c4c5d..70afd54022e40e 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -701,7 +701,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v19:BasicObject = SendWithoutBlockDirect v18, :foo (0x1038) + v19:BasicObject = SendDirect v18, 0x1038, :foo (0x1048) CheckInterrupts Return v19 "); @@ -795,7 +795,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v21:BasicObject = SendWithoutBlockDirect v20, :foo (0x1038), v11 + v21:BasicObject = SendDirect v20, 0x1038, :foo (0x1048), v11 CheckInterrupts Return v21 "); @@ -913,7 +913,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v19:BasicObject = SendWithoutBlockDirect v18, :foo (0x1038) + v19:BasicObject = SendDirect v18, 0x1038, :foo (0x1048) CheckInterrupts Return v19 "); @@ -941,7 +941,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, Integer@0x1008, cme:0x1010) v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v21:BasicObject = SendWithoutBlockDirect v20, :Integer (0x1038), v11 + v21:BasicObject = SendDirect v20, 0x1038, :Integer (0x1048), v11 CheckInterrupts Return v21 "); @@ -971,7 +971,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v22:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v23:BasicObject = SendWithoutBlockDirect v22, :foo (0x1038), v11, v13 + v23:BasicObject = SendDirect v22, 0x1038, :foo (0x1048), v11, v13 CheckInterrupts Return v23 "); @@ -1001,11 +1001,11 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v23:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v24:BasicObject = SendWithoutBlockDirect v23, :foo (0x1038) + v24:BasicObject = SendDirect v23, 0x1038, :foo (0x1048) PatchPoint NoSingletonClass(Object@0x1000) - PatchPoint MethodRedefined(Object@0x1000, bar@0x1040, cme:0x1048) + PatchPoint MethodRedefined(Object@0x1000, bar@0x1050, cme:0x1058) v27:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v28:BasicObject = SendWithoutBlockDirect v27, :bar (0x1038) + v28:BasicObject = SendDirect v27, 0x1038, :bar (0x1048) CheckInterrupts Return v28 "); @@ -1031,7 +1031,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v19:BasicObject = SendWithoutBlockDirect v18, :foo (0x1038) + v19:BasicObject = SendDirect v18, 0x1038, :foo (0x1048) CheckInterrupts Return v19 "); @@ -1058,7 +1058,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v21:BasicObject = SendWithoutBlockDirect v20, :foo (0x1038), v11 + v21:BasicObject = SendDirect v20, 0x1038, :foo (0x1048), v11 CheckInterrupts Return v21 "); @@ -1086,7 +1086,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v22:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v23:BasicObject = SendWithoutBlockDirect v22, :foo (0x1038), v11, v13 + v23:BasicObject = SendDirect v22, 0x1038, :foo (0x1048), v11, v13 CheckInterrupts Return v23 "); @@ -1113,14 +1113,14 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, target@0x1008, cme:0x1010) v44:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v45:BasicObject = SendWithoutBlockDirect v44, :target (0x1038) + v45:BasicObject = SendDirect v44, 0x1038, :target (0x1048) v14:Fixnum[10] = Const Value(10) v16:Fixnum[20] = Const Value(20) v18:Fixnum[30] = Const Value(30) PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, target@0x1008, cme:0x1010) v48:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v49:BasicObject = SendWithoutBlockDirect v48, :target (0x1038), v14, v16, v18 + v49:BasicObject = SendDirect v48, 0x1038, :target (0x1048), v14, v16, v18 v24:Fixnum[10] = Const Value(10) v26:Fixnum[20] = Const Value(20) v28:Fixnum[30] = Const Value(30) @@ -2865,20 +2865,21 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1000) PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) v21:HeapObject[class_exact:C] = GuardType v9, HeapObject[class_exact:C] - v22:BasicObject = SendWithoutBlockDirect v21, :foo (0x1038) + v22:BasicObject = SendDirect v21, 0x1038, :foo (0x1048) CheckInterrupts Return v22 "); } #[test] - fn dont_specialize_call_to_iseq_with_block() { - eval(" - def foo(&block) = 1 - def test = foo {|| } + fn test_send_direct_iseq_with_block() { + let result = eval(" + def foo(a, b, &block) = block.call(a, b) + def test = foo(1, 2) { |a, b| a + b } test test "); + assert_eq!(VALUE::fixnum_from_usize(3), result); assert_snapshot!(hir_string("test"), @r" fn test@:3: bb0(): @@ -2889,9 +2890,14 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v4) bb2(v6:BasicObject): - v11:BasicObject = Send v6, 0x1000, :foo # SendFallbackReason: Send: unsupported method type Iseq + v11:Fixnum[1] = Const Value(1) + v13:Fixnum[2] = Const Value(2) + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) + v22:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + v23:BasicObject = SendDirect v22, 0x1038, :foo (0x1048), v11, v13 CheckInterrupts - Return v11 + Return v23 "); } @@ -2921,7 +2927,10 @@ mod hir_opt_tests { bb2(v8:BasicObject, v9:NilClass): v13:Fixnum[1] = Const Value(1) SetLocal :a, l0, EP@3, v13 - v19:BasicObject = Send v8, 0x1000, :foo # SendFallbackReason: Send: unsupported method type Iseq + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) + v31:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v8, HeapObject[class_exact*:Object@VALUE(0x1000)] + v32:BasicObject = SendDirect v31, 0x1038, :foo (0x1048) v20:BasicObject = GetLocal :a, l0, EP@3 v24:BasicObject = GetLocal :a, l0, EP@3 CheckInterrupts @@ -3003,7 +3012,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v22:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v23:BasicObject = SendWithoutBlockDirect v22, :foo (0x1038), v11, v13 + v23:BasicObject = SendDirect v22, 0x1038, :foo (0x1048), v11, v13 CheckInterrupts Return v23 "); @@ -3033,7 +3042,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v24:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v26:BasicObject = SendWithoutBlockDirect v24, :foo (0x1038), v13, v15, v11 + v26:BasicObject = SendDirect v24, 0x1038, :foo (0x1048), v13, v15, v11 CheckInterrupts Return v26 "); @@ -3063,7 +3072,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v24:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v26:BasicObject = SendWithoutBlockDirect v24, :foo (0x1038), v11, v15, v13 + v26:BasicObject = SendDirect v24, 0x1038, :foo (0x1048), v11, v15, v13 CheckInterrupts Return v26 "); @@ -3092,7 +3101,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v22:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v23:BasicObject = SendWithoutBlockDirect v22, :foo (0x1038), v11, v13 + v23:BasicObject = SendDirect v22, 0x1038, :foo (0x1048), v11, v13 CheckInterrupts Return v23 "); @@ -3122,7 +3131,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v37:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v38:BasicObject = SendWithoutBlockDirect v37, :foo (0x1038), v11, v13, v15 + v38:BasicObject = SendDirect v37, 0x1038, :foo (0x1048), v11, v13, v15 v20:Fixnum[1] = Const Value(1) v22:Fixnum[2] = Const Value(2) v24:Fixnum[4] = Const Value(4) @@ -3130,7 +3139,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v41:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v43:BasicObject = SendWithoutBlockDirect v41, :foo (0x1038), v20, v22, v26, v24 + v43:BasicObject = SendDirect v41, 0x1038, :foo (0x1048), v20, v22, v26, v24 v30:ArrayExact = NewArray v38, v43 CheckInterrupts Return v30 @@ -3161,7 +3170,7 @@ mod hir_opt_tests { PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v35:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] v36:Fixnum[4] = Const Value(4) - v38:BasicObject = SendWithoutBlockDirect v35, :foo (0x1038), v11, v13, v36 + v38:BasicObject = SendDirect v35, 0x1038, :foo (0x1048), v11, v13, v36 v18:Fixnum[1] = Const Value(1) v20:Fixnum[2] = Const Value(2) v22:Fixnum[40] = Const Value(40) @@ -3169,7 +3178,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v41:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v43:BasicObject = SendWithoutBlockDirect v41, :foo (0x1038), v18, v20, v24, v22 + v43:BasicObject = SendDirect v41, 0x1038, :foo (0x1048), v18, v20, v24, v22 v28:ArrayExact = NewArray v38, v43 CheckInterrupts Return v28 @@ -3198,7 +3207,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, target@0x1008, cme:0x1010) v48:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v49:BasicObject = SendWithoutBlockDirect v48, :target (0x1038), v11 + v49:BasicObject = SendDirect v48, 0x1038, :target (0x1048), v11 v16:Fixnum[10] = Const Value(10) v18:Fixnum[20] = Const Value(20) v20:Fixnum[30] = Const Value(30) @@ -3206,7 +3215,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, target@0x1008, cme:0x1010) v52:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v53:BasicObject = SendWithoutBlockDirect v52, :target (0x1038), v16, v18, v20, v22 + v53:BasicObject = SendDirect v52, 0x1038, :target (0x1048), v16, v18, v20, v22 v27:Fixnum[10] = Const Value(10) v29:Fixnum[20] = Const Value(20) v31:Fixnum[30] = Const Value(30) @@ -3242,7 +3251,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] - v21:BasicObject = SendWithoutBlockDirect v20, :foo (0x1038), v11 + v21:BasicObject = SendDirect v20, 0x1038, :foo (0x1048), v11 CheckInterrupts Return v21 "); @@ -3296,7 +3305,7 @@ mod hir_opt_tests { PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] v19:Fixnum[1] = Const Value(1) - v21:BasicObject = SendWithoutBlockDirect v18, :foo (0x1038), v19 + v21:BasicObject = SendDirect v18, 0x1038, :foo (0x1048), v19 CheckInterrupts Return v21 "); @@ -3374,6 +3383,7 @@ mod hir_opt_tests { v11:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) v12:StringExact = StringCopy v11 v14:Fixnum[1] = Const Value(1) + IncrCounter complex_arg_pass_caller_kwarg v16:BasicObject = SendWithoutBlock v6, :sprintf, v12, v14 # SendFallbackReason: Complex argument passing CheckInterrupts Return v16 @@ -3578,7 +3588,7 @@ mod hir_opt_tests { v49:HeapObject[class_exact:C] = ObjectAllocClass C:VALUE(0x1008) PatchPoint NoSingletonClass(C@0x1008) PatchPoint MethodRedefined(C@0x1008, initialize@0x1038, cme:0x1040) - v52:BasicObject = SendWithoutBlockDirect v49, :initialize (0x1068), v16 + v52:BasicObject = SendDirect v49, 0x1068, :initialize (0x1078), v16 CheckInterrupts CheckInterrupts Return v49 @@ -5312,7 +5322,7 @@ mod hir_opt_tests { v13:Fixnum[10] = Const Value(10) PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018) - v23:BasicObject = SendWithoutBlockDirect v11, :[] (0x1040), v13 + v23:BasicObject = SendDirect v11, 0x1040, :[] (0x1050), v13 CheckInterrupts Return v23 "); @@ -5370,7 +5380,7 @@ mod hir_opt_tests { v11:ArrayExact = ArrayDup v10 PatchPoint NoSingletonClass(Array@0x1008) PatchPoint MethodRedefined(Array@0x1008, max@0x1010, cme:0x1018) - v20:BasicObject = SendWithoutBlockDirect v11, :max (0x1040) + v20:BasicObject = SendDirect v11, 0x1040, :max (0x1050) CheckInterrupts Return v20 "); @@ -6534,15 +6544,16 @@ mod hir_opt_tests { } #[test] - fn test_do_not_optimize_send_to_iseq_method_with_block() { - eval(r#" + fn test_send_direct_iseq_with_block_no_callee_block_param() { + let result = eval(r#" def foo yield 1 end - def test = foo {} + def test = foo { |x| x * 2 } test; test "#); + assert_eq!(VALUE::fixnum_from_usize(2), result); assert_snapshot!(hir_string("test"), @r" fn test@:6: bb0(): @@ -6553,9 +6564,12 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v4) bb2(v6:BasicObject): - v11:BasicObject = Send v6, 0x1000, :foo # SendFallbackReason: Send: unsupported method type Iseq + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) + v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + v19:BasicObject = SendDirect v18, 0x1038, :foo (0x1048) CheckInterrupts - Return v11 + Return v19 "); } @@ -11029,8 +11043,8 @@ mod hir_opt_tests { // A Ruby method as the target of `super` should optimize provided no block is given. let hir = hir_string_proc("B.new.method(:foo)"); - assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendWithoutBlockDirect but got:\n{hir}"); - assert!(hir.contains("SendWithoutBlockDirect"), "Should optimize to SendWithoutBlockDirect for call without args or block:\n{hir}"); + assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendDirect but got:\n{hir}"); + assert!(hir.contains("SendDirect"), "Should optimize to SendDirect for call without args or block:\n{hir}"); assert_snapshot!(hir, @r" fn foo@:10: @@ -11047,7 +11061,7 @@ mod hir_opt_tests { GuardSuperMethodEntry v17, 0x1038 v19:RubyValue = GetBlockHandler v17 v20:FalseClass = GuardBitEquals v19, Value(false) - v21:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040) + v21:BasicObject = SendDirect v6, 0x1040, :foo (0x1050) CheckInterrupts Return v21 "); @@ -11072,8 +11086,8 @@ mod hir_opt_tests { "); let hir = hir_string_proc("B.new.method(:foo)"); - assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendWithoutBlockDirect but got:\n{hir}"); - assert!(hir.contains("SendWithoutBlockDirect"), "Should optimize to SendWithoutBlockDirect for call without args or block:\n{hir}"); + assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to SendDirect but got:\n{hir}"); + assert!(hir.contains("SendDirect"), "Should optimize to SendDirect for call without args or block:\n{hir}"); assert_snapshot!(hir, @r" fn foo@:10: @@ -11091,9 +11105,9 @@ mod hir_opt_tests { GuardSuperMethodEntry v26, 0x1038 v28:RubyValue = GetBlockHandler v26 v29:FalseClass = GuardBitEquals v28, Value(false) - v30:BasicObject = SendWithoutBlockDirect v8, :foo (0x1040), v9 + v30:BasicObject = SendDirect v8, 0x1040, :foo (0x1050), v9 v17:Fixnum[1] = Const Value(1) - PatchPoint MethodRedefined(Integer@0x1048, +@0x1050, cme:0x1058) + PatchPoint MethodRedefined(Integer@0x1058, +@0x1060, cme:0x1068) v33:Fixnum = GuardType v30, Fixnum v34:Fixnum = FixnumAdd v33, v17 IncrCounter inline_cfunc_optimized_send_count @@ -11122,7 +11136,7 @@ mod hir_opt_tests { let hir = hir_string_proc("B.new.method(:foo)"); assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for explicit blockarg:\n{hir}"); + assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for explicit blockarg:\n{hir}"); assert_snapshot!(hir, @r" fn foo@:10: @@ -11162,9 +11176,9 @@ mod hir_opt_tests { let hir = hir_string_proc("B.new.method(:foo)"); assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for block literal:\n{hir}"); + assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for block literal:\n{hir}"); - // With a block, we don't optimize to SendWithoutBlockDirect + // With a block, we don't optimize to SendDirect assert_snapshot!(hir, @r" fn foo@:10: bb0(): @@ -11195,7 +11209,7 @@ mod hir_opt_tests { let hir = hir_string_proc("MyArray.new.method(:length)"); assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for CFUNC:\n{hir}"); + assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for CFUNC:\n{hir}"); assert_snapshot!(hir, @r" fn length@:4: @@ -11234,7 +11248,7 @@ mod hir_opt_tests { let hir = hir_string_proc("B.new.method(:foo)"); assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for explicit blockarg:\n{hir}"); + assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for explicit blockarg:\n{hir}"); assert_snapshot!(hir, @r" fn foo@:10: @@ -11282,7 +11296,7 @@ mod hir_opt_tests { let hir = hir_string_proc("B.new.method(:foo)"); assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendWithoutBlockDirect"), "Should not optimize to SendWithoutBlockDirect for symbol-to-proc:\n{hir}"); + assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for symbol-to-proc:\n{hir}"); assert_snapshot!(hir, @r" fn foo@:10: diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index c21402449f52fb..e0b0129ea1ce5a 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -80,8 +80,8 @@ mod snapshot_tests { PatchPoint MethodRedefined(Object@0x1010, foo@0x1018, cme:0x1020) v24:HeapObject[class_exact*:Object@VALUE(0x1010)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1010)] v25:Any = Snapshot FrameState { pc: 0x1008, stack: [v6, v13, v15, v11], locals: [] } - v26:BasicObject = SendWithoutBlockDirect v24, :foo (0x1048), v13, v15, v11 - v18:Any = Snapshot FrameState { pc: 0x1050, stack: [v26], locals: [] } + v26:BasicObject = SendDirect v24, 0x1048, :foo (0x1058), v13, v15, v11 + v18:Any = Snapshot FrameState { pc: 0x1060, stack: [v26], locals: [] } PatchPoint NoTracePoint CheckInterrupts Return v26 @@ -114,8 +114,8 @@ mod snapshot_tests { PatchPoint NoSingletonClass(Object@0x1010) PatchPoint MethodRedefined(Object@0x1010, foo@0x1018, cme:0x1020) v22:HeapObject[class_exact*:Object@VALUE(0x1010)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1010)] - v23:BasicObject = SendWithoutBlockDirect v22, :foo (0x1048), v11, v13 - v16:Any = Snapshot FrameState { pc: 0x1050, stack: [v23], locals: [] } + v23:BasicObject = SendDirect v22, 0x1048, :foo (0x1058), v11, v13 + v16:Any = Snapshot FrameState { pc: 0x1060, stack: [v23], locals: [] } PatchPoint NoTracePoint CheckInterrupts Return v23 diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 96d75b7aec84b7..bb11b96dd9a403 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -599,10 +599,10 @@ pub fn send_fallback_counter(reason: crate::hir::SendFallbackReason) -> Counter TooManyArgsForLir => send_fallback_too_many_args_for_lir, SendWithoutBlockBopRedefined => send_fallback_send_without_block_bop_redefined, SendWithoutBlockOperandsNotFixnum => send_fallback_send_without_block_operands_not_fixnum, - SendWithoutBlockDirectKeywordMismatch => send_fallback_send_without_block_direct_keyword_mismatch, - SendWithoutBlockDirectKeywordCountMismatch=> send_fallback_send_without_block_direct_keyword_count_mismatch, - SendWithoutBlockDirectMissingKeyword => send_fallback_send_without_block_direct_missing_keyword, - SendWithoutBlockDirectTooManyKeywords => send_fallback_send_without_block_direct_too_many_keywords, + SendDirectKeywordMismatch => send_fallback_send_without_block_direct_keyword_mismatch, + SendDirectKeywordCountMismatch => send_fallback_send_without_block_direct_keyword_count_mismatch, + SendDirectMissingKeyword => send_fallback_send_without_block_direct_missing_keyword, + SendDirectTooManyKeywords => send_fallback_send_without_block_direct_too_many_keywords, SendPolymorphic => send_fallback_send_polymorphic, SendMegamorphic => send_fallback_send_megamorphic, SendNoProfiles => send_fallback_send_no_profiles, From a8b877a843643fbdccd1a42efaf94ad27705dd55 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 28 Jan 2026 22:30:20 +0100 Subject: [PATCH 40/77] Update to ruby/mspec@5470479 --- spec/mspec/tool/sync/sync-rubyspec.rb | 6 +++--- spec/mspec/tool/tag_from_output.rb | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spec/mspec/tool/sync/sync-rubyspec.rb b/spec/mspec/tool/sync/sync-rubyspec.rb index 617123733e5442..122de0decba9f6 100644 --- a/spec/mspec/tool/sync/sync-rubyspec.rb +++ b/spec/mspec/tool/sync/sync-rubyspec.rb @@ -190,20 +190,20 @@ def test_new_specs Dir.chdir(SOURCE_REPO) do workflow = YAML.load_file(".github/workflows/ci.yml") job_name = MSPEC ? "test" : "specs" - versions = workflow.dig("jobs", job_name, "strategy", "matrix", "ruby") + versions = workflow.dig("jobs", job_name, "strategy", "matrix", "ruby").map(&:to_s) versions = versions.grep(/^\d+\./) # Test on MRI min_version, max_version = versions.minmax test_command = MSPEC ? "bundle install && bundle exec rspec" : "../mspec/bin/mspec -j" run_test = -> version { - command = "chruby #{version} && #{test_command}" + command = "chruby ruby-#{version} && #{test_command}" sh ENV["SHELL"], "-c", command } run_test[min_version] run_test[max_version] - run_test["ruby-master"] if TEST_MASTER + run_test["master"] if TEST_MASTER end end diff --git a/spec/mspec/tool/tag_from_output.rb b/spec/mspec/tool/tag_from_output.rb index b6b46038556ae1..41aa70f932057f 100755 --- a/spec/mspec/tool/tag_from_output.rb +++ b/spec/mspec/tool/tag_from_output.rb @@ -20,7 +20,7 @@ NUMBER = /^\d+\)$/ ERROR_OR_FAILED = / (ERROR|FAILED)$/ -SPEC_FILE = /^(\/.+_spec\.rb)\:\d+/ +SPEC_FILE = /^((?:\/|[CD]:\/).+_spec\.rb)\:\d+/ output.slice_before(NUMBER).select { |number, *rest| number =~ NUMBER and rest.any? { |line| line =~ ERROR_OR_FAILED } From dbd2ff7adca9b49e4bfa7bc3ec8b83bd437f8cb7 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 28 Jan 2026 22:30:21 +0100 Subject: [PATCH 41/77] Update to ruby/spec@83e26c9 --- spec/ruby/README.md | 1 + spec/ruby/command_line/dash_r_spec.rb | 5 +- spec/ruby/command_line/syntax_error_spec.rb | 10 +- spec/ruby/core/array/fetch_spec.rb | 8 +- spec/ruby/core/array/pack/c_spec.rb | 18 +- spec/ruby/core/array/pack/shared/basic.rb | 21 +- spec/ruby/core/array/pack/shared/float.rb | 72 +--- spec/ruby/core/array/pack/shared/integer.rb | 114 ++---- spec/ruby/core/array/pack/shared/unicode.rb | 18 +- spec/ruby/core/array/pack/w_spec.rb | 18 +- spec/ruby/core/array/rassoc_spec.rb | 22 +- spec/ruby/core/array/sum_spec.rb | 12 +- .../core/basicobject/instance_eval_spec.rb | 8 +- spec/ruby/core/binding/eval_spec.rb | 14 +- .../builtin_constants_spec.rb | 80 ++-- spec/ruby/core/data/with_spec.rb | 12 +- spec/ruby/core/dir/chdir_spec.rb | 144 ++++--- spec/ruby/core/dir/close_spec.rb | 2 +- spec/ruby/core/dir/fchdir_spec.rb | 108 +++--- spec/ruby/core/dir/for_fd_spec.rb | 110 +++--- .../core/encoding/ascii_compatible_spec.rb | 11 + spec/ruby/core/encoding/dummy_spec.rb | 11 + spec/ruby/core/encoding/replicate_spec.rb | 84 +---- spec/ruby/core/enumerator/each_spec.rb | 15 + .../core/exception/no_method_error_spec.rb | 261 +++++-------- spec/ruby/core/false/singleton_method_spec.rb | 14 +- spec/ruby/core/fiber/kill_spec.rb | 120 +++--- spec/ruby/core/fiber/storage_spec.rb | 12 +- spec/ruby/core/file/basename_spec.rb | 6 +- spec/ruby/core/file/dirname_spec.rb | 33 ++ spec/ruby/core/float/ceil_spec.rb | 4 +- spec/ruby/core/float/floor_spec.rb | 4 +- spec/ruby/core/float/round_spec.rb | 69 ++-- spec/ruby/core/gc/config_spec.rb | 14 + spec/ruby/core/hash/compact_spec.rb | 38 +- spec/ruby/core/hash/constructor_spec.rb | 30 +- spec/ruby/core/hash/new_spec.rb | 2 +- .../core/hash/ruby2_keywords_hash_spec.rb | 12 +- spec/ruby/core/hash/shared/to_s.rb | 31 ++ spec/ruby/core/integer/ceil_spec.rb | 11 - .../integer/shared/integer_ceil_precision.rb | 25 +- .../integer/shared/integer_floor_precision.rb | 9 +- spec/ruby/core/io/binread_spec.rb | 2 +- spec/ruby/core/io/buffer/empty_spec.rb | 8 +- spec/ruby/core/io/buffer/external_spec.rb | 103 +---- spec/ruby/core/io/buffer/for_spec.rb | 94 +++++ spec/ruby/core/io/buffer/free_spec.rb | 20 +- spec/ruby/core/io/buffer/initialize_spec.rb | 56 ++- spec/ruby/core/io/buffer/internal_spec.rb | 103 +---- spec/ruby/core/io/buffer/map_spec.rb | 357 ++++++++++++++++++ spec/ruby/core/io/buffer/mapped_spec.rb | 103 +---- spec/ruby/core/io/buffer/null_spec.rb | 8 +- spec/ruby/core/io/buffer/private_spec.rb | 120 +----- spec/ruby/core/io/buffer/readonly_spec.rb | 139 +------ spec/ruby/core/io/buffer/resize_spec.rb | 30 +- .../core/io/buffer/shared/null_and_empty.rb | 8 +- spec/ruby/core/io/buffer/shared_spec.rb | 115 +----- spec/ruby/core/io/buffer/string_spec.rb | 62 +++ spec/ruby/core/io/buffer/transfer_spec.rb | 18 +- spec/ruby/core/io/foreach_spec.rb | 14 +- spec/ruby/core/io/gets_spec.rb | 24 +- spec/ruby/core/io/pread_spec.rb | 216 ++++++----- spec/ruby/core/io/pwrite_spec.rb | 104 +++-- spec/ruby/core/io/read_spec.rb | 46 +-- spec/ruby/core/io/readlines_spec.rb | 14 +- spec/ruby/core/io/select_spec.rb | 30 +- spec/ruby/core/io/shared/readlines.rb | 8 +- spec/ruby/core/io/write_spec.rb | 9 +- spec/ruby/core/kernel/Integer_spec.rb | 17 +- spec/ruby/core/kernel/caller_spec.rb | 17 +- spec/ruby/core/kernel/eval_spec.rb | 40 +- spec/ruby/core/kernel/lambda_spec.rb | 52 +-- spec/ruby/core/kernel/open_spec.rb | 14 +- spec/ruby/core/kernel/shared/require.rb | 16 +- spec/ruby/core/kernel/sleep_spec.rb | 28 +- spec/ruby/core/marshal/shared/load.rb | 68 ++-- .../core/matchdata/named_captures_spec.rb | 16 +- spec/ruby/core/math/log10_spec.rb | 4 + .../core/module/set_temporary_name_spec.rb | 218 ++++++----- spec/ruby/core/module/shared/class_eval.rb | 6 +- spec/ruby/core/nil/singleton_method_spec.rb | 14 +- spec/ruby/core/numeric/remainder_spec.rb | 4 +- .../core/objectspace/weakkeymap/clear_spec.rb | 34 +- .../objectspace/weakkeymap/delete_spec.rb | 78 ++-- .../weakkeymap/element_reference_spec.rb | 202 +++++----- .../weakkeymap/element_set_spec.rb | 124 +++--- .../objectspace/weakkeymap/getkey_spec.rb | 38 +- .../objectspace/weakkeymap/inspect_spec.rb | 30 +- .../core/objectspace/weakkeymap/key_spec.rb | 68 ++-- .../core/objectspace/weakmap/delete_spec.rb | 42 +-- spec/ruby/core/proc/clone_spec.rb | 20 +- spec/ruby/core/proc/dup_spec.rb | 18 +- spec/ruby/core/proc/lambda_spec.rb | 7 - spec/ruby/core/process/argv0_spec.rb | 6 +- spec/ruby/core/process/status/bit_and_spec.rb | 2 +- .../core/process/status/right_shift_spec.rb | 2 +- spec/ruby/core/process/warmup_spec.rb | 10 +- spec/ruby/core/range/case_compare_spec.rb | 6 +- spec/ruby/core/range/overlap_spec.rb | 168 ++++----- spec/ruby/core/range/reverse_each_spec.rb | 172 +++++---- spec/ruby/core/range/to_set_spec.rb | 59 ++- spec/ruby/core/rational/ceil_spec.rb | 49 +-- spec/ruby/core/rational/exponent_spec.rb | 16 +- spec/ruby/core/rational/floor_spec.rb | 50 +-- .../core/refinement/refined_class_spec.rb | 6 +- spec/ruby/core/refinement/target_spec.rb | 4 +- spec/ruby/core/regexp/linear_time_spec.rb | 6 +- spec/ruby/core/set/flatten_spec.rb | 10 - spec/ruby/core/set/merge_spec.rb | 12 +- spec/ruby/core/set/proper_subset_spec.rb | 10 - spec/ruby/core/set/subset_spec.rb | 10 - spec/ruby/core/string/append_as_bytes_spec.rb | 4 +- spec/ruby/core/string/bytesplice_spec.rb | 316 ++++++++-------- spec/ruby/core/string/index_spec.rb | 29 +- spec/ruby/core/string/shared/chars.rb | 22 +- spec/ruby/core/string/shared/codepoints.rb | 5 + spec/ruby/core/string/shared/each_line.rb | 14 + .../core/string/shared/grapheme_clusters.rb | 9 + spec/ruby/core/string/start_with_spec.rb | 15 +- spec/ruby/core/string/tr_s_spec.rb | 12 +- spec/ruby/core/string/tr_spec.rb | 12 +- spec/ruby/core/string/unpack/b_spec.rb | 36 +- spec/ruby/core/string/unpack/c_spec.rb | 18 +- spec/ruby/core/string/unpack/h_spec.rb | 36 +- spec/ruby/core/string/unpack/shared/basic.rb | 18 +- spec/ruby/core/string/unpack/shared/float.rb | 74 +--- .../ruby/core/string/unpack/shared/integer.rb | 110 ++---- .../ruby/core/string/unpack/shared/unicode.rb | 18 +- spec/ruby/core/string/unpack/w_spec.rb | 18 +- spec/ruby/core/struct/new_spec.rb | 16 +- spec/ruby/core/symbol/inspect_spec.rb | 19 + .../backtrace/location/fixtures/classes.rb | 104 +++++ .../thread/backtrace/location/label_spec.rb | 192 +++++++++- .../ruby/core/thread/native_thread_id_spec.rb | 8 +- spec/ruby/core/time/new_spec.rb | 18 +- spec/ruby/core/tracepoint/path_spec.rb | 31 +- .../core/tracepoint/raised_exception_spec.rb | 26 +- spec/ruby/core/true/singleton_method_spec.rb | 14 +- .../core/unboundmethod/equal_value_spec.rb | 3 - .../core/warning/element_reference_spec.rb | 8 +- spec/ruby/core/warning/element_set_spec.rb | 16 +- spec/ruby/language/assignments_spec.rb | 10 +- spec/ruby/language/block_spec.rb | 53 ++- spec/ruby/language/delegation_spec.rb | 46 +-- spec/ruby/language/file_spec.rb | 12 +- spec/ruby/language/for_spec.rb | 55 ++- spec/ruby/language/hash_spec.rb | 29 +- spec/ruby/language/it_parameter_spec.rb | 46 ++- spec/ruby/language/keyword_arguments_spec.rb | 16 +- spec/ruby/language/method_spec.rb | 6 +- spec/ruby/library/English/English_spec.rb | 12 - .../ruby/library/bigdecimal/remainder_spec.rb | 19 - spec/ruby/library/bigdecimal/to_s_spec.rb | 6 +- .../random/formatter/alphanumeric_spec.rb | 18 +- spec/ruby/library/ripper/lex_spec.rb | 6 +- .../socket/addrinfo/initialize_spec.rb | 36 +- .../socket/basicsocket/recv_nonblock_spec.rb | 66 +--- .../library/socket/basicsocket/recv_spec.rb | 43 +-- .../basicsocket/recvmsg_nonblock_spec.rb | 69 +--- .../socket/basicsocket/recvmsg_spec.rb | 48 +-- .../library/socket/ipsocket/recvfrom_spec.rb | 44 +-- .../library/socket/socket/getaddrinfo_spec.rb | 22 +- .../library/socket/socket/getnameinfo_spec.rb | 22 +- .../socket/socket/recvfrom_nonblock_spec.rb | 67 +--- .../library/socket/socket/recvfrom_spec.rb | 44 +-- .../stringscanner/named_captures_spec.rb | 8 +- spec/ruby/optional/capi/encoding_spec.rb | 30 ++ spec/ruby/optional/capi/ext/encoding_spec.c | 5 + spec/ruby/optional/capi/ext/kernel_spec.c | 11 + spec/ruby/optional/capi/ext/string_spec.c | 10 + spec/ruby/optional/capi/io_spec.rb | 240 ++++++------ spec/ruby/optional/capi/kernel_spec.rb | 6 + spec/ruby/optional/capi/object_spec.rb | 2 - spec/ruby/optional/capi/spec_helper.rb | 6 +- spec/ruby/optional/capi/string_spec.rb | 125 ++++++ spec/ruby/optional/capi/struct_spec.rb | 142 ++++--- spec/ruby/security/cve_2020_10663_spec.rb | 2 +- spec/ruby/shared/kernel/at_exit.rb | 5 +- spec/ruby/shared/queue/freeze.rb | 18 +- spec/ruby/shared/string/start_with.rb | 12 +- 180 files changed, 3735 insertions(+), 4100 deletions(-) create mode 100644 spec/ruby/core/io/buffer/for_spec.rb create mode 100644 spec/ruby/core/io/buffer/map_spec.rb create mode 100644 spec/ruby/core/io/buffer/string_spec.rb diff --git a/spec/ruby/README.md b/spec/ruby/README.md index 674ada4c9e4cc9..14a0068346fe3d 100644 --- a/spec/ruby/README.md +++ b/spec/ruby/README.md @@ -64,6 +64,7 @@ For older specs try these commits: * Ruby 2.7.8 - [Suite](https://github.com/ruby/spec/commit/93787e6035c925b593a9c0c6fb0e7e07a6f1df1f) using [MSpec](https://github.com/ruby/mspec/commit/1d8cf64722d8a7529f7cd205be5f16a89b7a67fd) * Ruby 3.0.7 - [Suite](https://github.com/ruby/spec/commit/affef93d9940f615e4836f64b011da211f570913) using [MSpec](https://github.com/ruby/mspec/commit/0aabb3e548eb5ea6cad0125f8f46cee34542b6b7) * Ruby 3.1.6 - [Suite](https://github.com/ruby/spec/commit/ec960f2389d1c2265d32397fa8afa6d462014efc) using [MSpec](https://github.com/ruby/mspec/commit/484310dbed35b84c74484fd674602f88c42d063a) +* Ruby 3.2.9 - [Suite](https://github.com/ruby/spec/commit/97f076242b7fc6e60703e6a6053365065cd6fc30) using [MSpec](https://github.com/ruby/mspec/commit/54704795e21128a930af2021c72c49cb87065134) ### Running the specs diff --git a/spec/ruby/command_line/dash_r_spec.rb b/spec/ruby/command_line/dash_r_spec.rb index 9f673c53dcc097..62b8dc001452a7 100644 --- a/spec/ruby/command_line/dash_r_spec.rb +++ b/spec/ruby/command_line/dash_r_spec.rb @@ -16,10 +16,7 @@ out = ruby_exe(fixture(__FILE__, "bad_syntax.rb"), options: "-r #{@test_file}", args: "2>&1", exit_status: 1) $?.should_not.success? out.should include("REQUIRED") - - # it's tempting not to rely on error message and rely only on exception class name, - # but CRuby before 3.2 doesn't print class name for syntax error - out.should include_any_of("syntax error", "SyntaxError") + out.should include("SyntaxError") end it "does not require the file if the main script file does not exist" do diff --git a/spec/ruby/command_line/syntax_error_spec.rb b/spec/ruby/command_line/syntax_error_spec.rb index 9ba87b9e22795b..88864c048ebfee 100644 --- a/spec/ruby/command_line/syntax_error_spec.rb +++ b/spec/ruby/command_line/syntax_error_spec.rb @@ -3,17 +3,11 @@ describe "The interpreter" do it "prints an error when given a file with invalid syntax" do out = ruby_exe(fixture(__FILE__, "bad_syntax.rb"), args: "2>&1", exit_status: 1) - - # it's tempting not to rely on error message and rely only on exception class name, - # but CRuby before 3.2 doesn't print class name for syntax error - out.should include_any_of("syntax error", "SyntaxError") + out.should.include?("SyntaxError") end it "prints an error when given code via -e with invalid syntax" do out = ruby_exe(nil, args: "-e 'a{' 2>&1", exit_status: 1) - - # it's tempting not to rely on error message and rely only on exception class name, - # but CRuby before 3.2 doesn't print class name for syntax error - out.should include_any_of("syntax error", "SyntaxError") + out.should.include?("SyntaxError") end end diff --git a/spec/ruby/core/array/fetch_spec.rb b/spec/ruby/core/array/fetch_spec.rb index b81c0b48d7296d..598b481ba46a11 100644 --- a/spec/ruby/core/array/fetch_spec.rb +++ b/spec/ruby/core/array/fetch_spec.rb @@ -12,9 +12,9 @@ end it "raises an IndexError if there is no element at index" do - -> { [1, 2, 3].fetch(3) }.should raise_error(IndexError) - -> { [1, 2, 3].fetch(-4) }.should raise_error(IndexError) - -> { [].fetch(0) }.should raise_error(IndexError) + -> { [1, 2, 3].fetch(3) }.should raise_error(IndexError, "index 3 outside of array bounds: -3...3") + -> { [1, 2, 3].fetch(-4) }.should raise_error(IndexError, "index -4 outside of array bounds: -3...3") + -> { [].fetch(0) }.should raise_error(IndexError, "index 0 outside of array bounds: 0...0") end it "returns default if there is no element at index if passed a default value" do @@ -50,6 +50,6 @@ def o.to_int(); 5; end end it "raises a TypeError when the passed argument can't be coerced to Integer" do - -> { [].fetch("cat") }.should raise_error(TypeError) + -> { [].fetch("cat") }.should raise_error(TypeError, "no implicit conversion of String into Integer") end end diff --git a/spec/ruby/core/array/pack/c_spec.rb b/spec/ruby/core/array/pack/c_spec.rb index 47b71b663d6a98..7a2b95def87f7f 100644 --- a/spec/ruby/core/array/pack/c_spec.rb +++ b/spec/ruby/core/array/pack/c_spec.rb @@ -45,20 +45,10 @@ [1, 2, 3, 4, 5].pack(pack_format('*')).should == "\x01\x02\x03\x04\x05" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [1, 2, 3].pack(pack_format("\000", 2)).should == "\x01\x02" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [1, 2, 3].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [1, 2, 3].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/array/pack/shared/basic.rb b/spec/ruby/core/array/pack/shared/basic.rb index a63f64d296a312..2ebd75f6c5ed79 100644 --- a/spec/ruby/core/array/pack/shared/basic.rb +++ b/spec/ruby/core/array/pack/shared/basic.rb @@ -32,22 +32,11 @@ [@obj, @obj, @obj, @obj].pack("aa #{pack_format} # some comment \n#{pack_format}").should be_an_instance_of(String) end - ruby_version_is ""..."3.3" do - it "warns that a directive is unknown" do - # additional directive ('a') is required for the X directive - -> { [@obj, @obj].pack("a K" + pack_format) }.should complain(/unknown pack directive 'K' in 'a K#{pack_format}'/) - -> { [@obj, @obj].pack("a 0" + pack_format) }.should complain(/unknown pack directive '0' in 'a 0#{pack_format}'/) - -> { [@obj, @obj].pack("a :" + pack_format) }.should complain(/unknown pack directive ':' in 'a :#{pack_format}'/) - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError when a directive is unknown" do - # additional directive ('a') is required for the X directive - -> { [@obj, @obj].pack("a R" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive 'R'/) - -> { [@obj, @obj].pack("a 0" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive '0'/) - -> { [@obj, @obj].pack("a :" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive ':'/) - end + it "raise ArgumentError when a directive is unknown" do + # additional directive ('a') is required for the X directive + -> { [@obj, @obj].pack("a R" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive 'R'/) + -> { [@obj, @obj].pack("a 0" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive '0'/) + -> { [@obj, @obj].pack("a :" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive ':'/) end it "calls #to_str to coerce the directives string" do diff --git a/spec/ruby/core/array/pack/shared/float.rb b/spec/ruby/core/array/pack/shared/float.rb index 76c800b74dc5f1..3f60fee2150b48 100644 --- a/spec/ruby/core/array/pack/shared/float.rb +++ b/spec/ruby/core/array/pack/shared/float.rb @@ -25,20 +25,10 @@ [2.9, 1.4, 8.2].pack(pack_format("*")).should == "\x9a\x999@33\xb3?33\x03A" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [5.3, 9.2].pack(pack_format("\000", 2)).should == "\x9a\x99\xa9@33\x13A" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [5.3, 9.2].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [5.3, 9.2].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -105,20 +95,10 @@ [2.9, 1.4, 8.2].pack(pack_format("*")).should == "@9\x99\x9a?\xb333A\x0333" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [5.3, 9.2].pack(pack_format("\000", 2)).should == "@\xa9\x99\x9aA\x1333" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [5.3, 9.2].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [5.3, 9.2].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -177,20 +157,10 @@ [2.9, 1.4, 8.2].pack(pack_format("*")).should == "333333\x07@ffffff\xf6?ffffff\x20@" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [5.3, 9.2].pack(pack_format("\000", 2)).should == "333333\x15@ffffff\x22@" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [5.3, 9.2].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [5.3, 9.2].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -248,20 +218,10 @@ [2.9, 1.4, 8.2].pack(pack_format("*")).should == "@\x07333333?\xf6ffffff@\x20ffffff" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [5.3, 9.2].pack(pack_format("\000", 2)).should == "@\x15333333@\x22ffffff" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [5.3, 9.2].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [5.3, 9.2].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/array/pack/shared/integer.rb b/spec/ruby/core/array/pack/shared/integer.rb index 61f7cca184a9b0..ff2ee492016cc4 100644 --- a/spec/ruby/core/array/pack/shared/integer.rb +++ b/spec/ruby/core/array/pack/shared/integer.rb @@ -41,21 +41,10 @@ str.should == "\x78\x65\xcd\xab\x21\x43" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - str.should == "\x78\x65\xcd\xab" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -105,21 +94,10 @@ str.should == "\x65\x78\xab\xcd\x43\x21" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - str.should == "\x65\x78\xab\xcd" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -169,21 +147,10 @@ str.should == "\x78\x65\x43\x12\xcd\xab\xf0\xde\x21\x43\x65\x78" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - str.should == "\x78\x65\x43\x12\xcd\xab\xf0\xde" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -233,21 +200,10 @@ str.should == "\x12\x43\x65\x78\xde\xf0\xab\xcd\x78\x65\x43\x21" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - str.should == "\x12\x43\x65\x78\xde\xf0\xab\xcd" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0x1243_6578, 0xdef0_abcd].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -357,21 +313,10 @@ str.should == "\x56\x78\x12\x34\xcd\xab\xf0\xde\xf0\xde\xba\xdc\x21\x43\x65\x78" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) - str.should == "\x56\x78\x12\x34\xcd\xab\xf0\xde\xf0\xde\xba\xdc\x21\x43\x65\x78" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do @@ -429,21 +374,10 @@ str.should == "\xde\xf0\xab\xcd\x34\x12\x78\x56\x78\x65\x43\x21\xdc\xba\xde\xf0" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - str = [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) - str.should == "\xde\xf0\xab\xcd\x34\x12\x78\x56\x78\x65\x43\x21\xdc\xba\xde\xf0" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [0xdef0_abcd_3412_7856, 0x7865_4321_dcba_def0].pack(pack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/array/pack/shared/unicode.rb b/spec/ruby/core/array/pack/shared/unicode.rb index 4d8eaef3231067..0eccc7098c7cbf 100644 --- a/spec/ruby/core/array/pack/shared/unicode.rb +++ b/spec/ruby/core/array/pack/shared/unicode.rb @@ -67,20 +67,10 @@ -> { [obj].pack("U") }.should raise_error(TypeError) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [1, 2, 3].pack("U\x00U").should == "\x01\x02" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [1, 2, 3].pack("U\x00U") - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [1, 2, 3].pack("U\x00U") + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/array/pack/w_spec.rb b/spec/ruby/core/array/pack/w_spec.rb index e770288d67b4d2..ebadb94cab0504 100644 --- a/spec/ruby/core/array/pack/w_spec.rb +++ b/spec/ruby/core/array/pack/w_spec.rb @@ -24,20 +24,10 @@ [obj].pack("w").should == "\x05" end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - [1, 2, 3].pack("w\x00w").should == "\x01\x02" - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - [1, 2, 3].pack("w\x00w") - }.should raise_error(ArgumentError, /unknown pack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + [1, 2, 3].pack("w\x00w") + }.should raise_error(ArgumentError, /unknown pack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/array/rassoc_spec.rb b/spec/ruby/core/array/rassoc_spec.rb index 632a05e8b3778b..a7ffb75fb53521 100644 --- a/spec/ruby/core/array/rassoc_spec.rb +++ b/spec/ruby/core/array/rassoc_spec.rb @@ -36,17 +36,15 @@ def o.==(other); other == 'foobar'; end [[1, :foobar, o], [2, o, 1], [3, mock('foo')]].rassoc(key).should == [2, o, 1] end - ruby_version_is "3.3" do - it "calls to_ary on non-array elements" do - s1 = [1, 2] - s2 = ArraySpecs::ArrayConvertible.new(2, 3) - a = [s1, s2] - - s1.should_not_receive(:to_ary) - a.rassoc(2).should equal(s1) - - a.rassoc(3).should == [2, 3] - s2.called.should equal(:to_ary) - end + it "calls to_ary on non-array elements" do + s1 = [1, 2] + s2 = ArraySpecs::ArrayConvertible.new(2, 3) + a = [s1, s2] + + s1.should_not_receive(:to_ary) + a.rassoc(2).should equal(s1) + + a.rassoc(3).should == [2, 3] + s2.called.should equal(:to_ary) end end diff --git a/spec/ruby/core/array/sum_spec.rb b/spec/ruby/core/array/sum_spec.rb index 06abe061359faa..1886d692faaddc 100644 --- a/spec/ruby/core/array/sum_spec.rb +++ b/spec/ruby/core/array/sum_spec.rb @@ -74,13 +74,11 @@ [b].sum(a).should == 42 end - ruby_bug '#19530', ''...'3.3' do - it "calls + on the init value" do - a = mock("a") - b = mock("b") - a.should_receive(:+).with(42).and_return(b) - [42].sum(a).should == b - end + it "calls + on the init value" do + a = mock("a") + b = mock("b") + a.should_receive(:+).with(42).and_return(b) + [42].sum(a).should == b end end diff --git a/spec/ruby/core/basicobject/instance_eval_spec.rb b/spec/ruby/core/basicobject/instance_eval_spec.rb index 633b5c2cb1d9bb..f8d9d7505920d1 100644 --- a/spec/ruby/core/basicobject/instance_eval_spec.rb +++ b/spec/ruby/core/basicobject/instance_eval_spec.rb @@ -84,11 +84,9 @@ def foo end - ruby_version_is "3.3" do - it "uses the caller location as default location" do - f = Object.new - f.instance_eval("[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] - end + it "uses the caller location as default location" do + f = Object.new + f.instance_eval("[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] end it "has access to receiver's instance variables" do diff --git a/spec/ruby/core/binding/eval_spec.rb b/spec/ruby/core/binding/eval_spec.rb index bb2036f73911c4..7852e1c93936b4 100644 --- a/spec/ruby/core/binding/eval_spec.rb +++ b/spec/ruby/core/binding/eval_spec.rb @@ -60,14 +60,6 @@ bind.eval("#foo\n__LINE__", "(test)", 88).should == 89 end - ruby_version_is ""..."3.3" do - it "uses (eval) as __FILE__ if single argument given" do - obj = BindingSpecs::Demo.new(1) - bind = obj.get_binding - bind.eval("__FILE__").should == '(eval)' - end - end - it "uses 1 as __LINE__" do obj = BindingSpecs::Demo.new(1) bind = obj.get_binding @@ -107,9 +99,7 @@ bind.eval("'bar'.foo").should == "foo" end - ruby_version_is "3.3" do - it "uses the caller location as default filename" do - binding.eval("[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] - end + it "uses the caller location as default filename" do + binding.eval("[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] end end diff --git a/spec/ruby/core/builtin_constants/builtin_constants_spec.rb b/spec/ruby/core/builtin_constants/builtin_constants_spec.rb index 13e066cc7f1664..2c71b416679749 100644 --- a/spec/ruby/core/builtin_constants/builtin_constants_spec.rb +++ b/spec/ruby/core/builtin_constants/builtin_constants_spec.rb @@ -87,65 +87,63 @@ end ruby_version_is "4.0" do - context "The constant" do - describe "Ruby" do - it "is a Module" do - Ruby.should.instance_of?(Module) - end + describe "Ruby" do + it "is a Module" do + Ruby.should.instance_of?(Module) end + end - describe "Ruby::VERSION" do - it "is equal to RUBY_VERSION" do - Ruby::VERSION.should equal(RUBY_VERSION) - end + describe "Ruby::VERSION" do + it "is equal to RUBY_VERSION" do + Ruby::VERSION.should equal(RUBY_VERSION) end + end - describe "RUBY::PATCHLEVEL" do - it "is equal to RUBY_PATCHLEVEL" do - Ruby::PATCHLEVEL.should equal(RUBY_PATCHLEVEL) - end + describe "RUBY::PATCHLEVEL" do + it "is equal to RUBY_PATCHLEVEL" do + Ruby::PATCHLEVEL.should equal(RUBY_PATCHLEVEL) end + end - describe "Ruby::COPYRIGHT" do - it "is equal to RUBY_COPYRIGHT" do - Ruby::COPYRIGHT.should equal(RUBY_COPYRIGHT) - end + describe "Ruby::COPYRIGHT" do + it "is equal to RUBY_COPYRIGHT" do + Ruby::COPYRIGHT.should equal(RUBY_COPYRIGHT) end + end - describe "Ruby::DESCRIPTION" do - it "is equal to RUBY_DESCRIPTION" do - Ruby::DESCRIPTION.should equal(RUBY_DESCRIPTION) - end + describe "Ruby::DESCRIPTION" do + it "is equal to RUBY_DESCRIPTION" do + Ruby::DESCRIPTION.should equal(RUBY_DESCRIPTION) end + end - describe "Ruby::ENGINE" do - it "is equal to RUBY_ENGINE" do - Ruby::ENGINE.should equal(RUBY_ENGINE) - end + describe "Ruby::ENGINE" do + it "is equal to RUBY_ENGINE" do + Ruby::ENGINE.should equal(RUBY_ENGINE) end + end - describe "Ruby::ENGINE_VERSION" do - it "is equal to RUBY_ENGINE_VERSION" do - Ruby::ENGINE_VERSION.should equal(RUBY_ENGINE_VERSION) - end + describe "Ruby::ENGINE_VERSION" do + it "is equal to RUBY_ENGINE_VERSION" do + Ruby::ENGINE_VERSION.should equal(RUBY_ENGINE_VERSION) end + end - describe "Ruby::PLATFORM" do - it "is equal to RUBY_PLATFORM" do - Ruby::PLATFORM.should equal(RUBY_PLATFORM) - end + describe "Ruby::PLATFORM" do + it "is equal to RUBY_PLATFORM" do + Ruby::PLATFORM.should equal(RUBY_PLATFORM) end + end - describe "Ruby::RELEASE_DATE" do - it "is equal to RUBY_RELEASE_DATE" do - Ruby::RELEASE_DATE.should equal(RUBY_RELEASE_DATE) - end + describe "Ruby::RELEASE_DATE" do + it "is equal to RUBY_RELEASE_DATE" do + Ruby::RELEASE_DATE.should equal(RUBY_RELEASE_DATE) end + end - describe "Ruby::REVISION" do - it "is equal to RUBY_REVISION" do - Ruby::REVISION.should equal(RUBY_REVISION) - end + describe "Ruby::REVISION" do + it "is equal to RUBY_REVISION" do + Ruby::REVISION.should equal(RUBY_REVISION) end end end diff --git a/spec/ruby/core/data/with_spec.rb b/spec/ruby/core/data/with_spec.rb index fd0a99d1fadaab..83cb97fa60777b 100644 --- a/spec/ruby/core/data/with_spec.rb +++ b/spec/ruby/core/data/with_spec.rb @@ -44,14 +44,12 @@ def subclass.new(*) data_copy.unit.should == "m" end - ruby_version_is "3.3" do - it "calls #initialize" do - data = DataSpecs::DataWithOverriddenInitialize.new(42, "m") - ScratchPad.clear + it "calls #initialize" do + data = DataSpecs::DataWithOverriddenInitialize.new(42, "m") + ScratchPad.clear - data.with(amount: 0) + data.with(amount: 0) - ScratchPad.recorded.should == [:initialize, [], {amount: 0, unit: "m"}] - end + ScratchPad.recorded.should == [:initialize, [], {amount: 0, unit: "m"}] end end diff --git a/spec/ruby/core/dir/chdir_spec.rb b/spec/ruby/core/dir/chdir_spec.rb index 015386a9026cf3..fd277e4e1d64fc 100644 --- a/spec/ruby/core/dir/chdir_spec.rb +++ b/spec/ruby/core/dir/chdir_spec.rb @@ -125,96 +125,94 @@ def to_str; DirSpecs.mock_dir; end end end -ruby_version_is '3.3' do - describe "Dir#chdir" do - before :all do - DirSpecs.create_mock_dirs - end +describe "Dir#chdir" do + before :all do + DirSpecs.create_mock_dirs + end - after :all do - DirSpecs.delete_mock_dirs - end + after :all do + DirSpecs.delete_mock_dirs + end - before :each do - @original = Dir.pwd - end + before :each do + @original = Dir.pwd + end - after :each do - Dir.chdir(@original) - end + after :each do + Dir.chdir(@original) + end - it "changes the current working directory to self" do - dir = Dir.new(DirSpecs.mock_dir) - dir.chdir - Dir.pwd.should == DirSpecs.mock_dir - ensure - dir.close - end + it "changes the current working directory to self" do + dir = Dir.new(DirSpecs.mock_dir) + dir.chdir + Dir.pwd.should == DirSpecs.mock_dir + ensure + dir.close + end - it "changes the current working directory to self for duration of the block when a block is given" do - dir = Dir.new(DirSpecs.mock_dir) - pwd_in_block = nil + it "changes the current working directory to self for duration of the block when a block is given" do + dir = Dir.new(DirSpecs.mock_dir) + pwd_in_block = nil - dir.chdir { pwd_in_block = Dir.pwd } + dir.chdir { pwd_in_block = Dir.pwd } - pwd_in_block.should == DirSpecs.mock_dir - Dir.pwd.should == @original - ensure - dir.close - end + pwd_in_block.should == DirSpecs.mock_dir + Dir.pwd.should == @original + ensure + dir.close + end - it "returns 0 when successfully changing directory" do - dir = Dir.new(DirSpecs.mock_dir) - dir.chdir.should == 0 - ensure - dir.close - end + it "returns 0 when successfully changing directory" do + dir = Dir.new(DirSpecs.mock_dir) + dir.chdir.should == 0 + ensure + dir.close + end - it "returns the value of the block when a block is given" do - dir = Dir.new(DirSpecs.mock_dir) - dir.chdir { :block_value }.should == :block_value - ensure - dir.close - end + it "returns the value of the block when a block is given" do + dir = Dir.new(DirSpecs.mock_dir) + dir.chdir { :block_value }.should == :block_value + ensure + dir.close + end + + platform_is_not :windows do + it "does not raise an Errno::ENOENT if the original directory no longer exists" do + dir_name1 = tmp('testdir1') + dir_name2 = tmp('testdir2') + Dir.should_not.exist?(dir_name1) + Dir.should_not.exist?(dir_name2) + Dir.mkdir dir_name1 + Dir.mkdir dir_name2 - platform_is_not :windows do - it "does not raise an Errno::ENOENT if the original directory no longer exists" do - dir_name1 = tmp('testdir1') - dir_name2 = tmp('testdir2') - Dir.should_not.exist?(dir_name1) - Dir.should_not.exist?(dir_name2) - Dir.mkdir dir_name1 - Dir.mkdir dir_name2 - - dir2 = Dir.new(dir_name2) - - begin - Dir.chdir(dir_name1) do - dir2.chdir { Dir.unlink dir_name1 } - end - Dir.pwd.should == @original - ensure - Dir.unlink dir_name1 if Dir.exist?(dir_name1) - Dir.unlink dir_name2 if Dir.exist?(dir_name2) + dir2 = Dir.new(dir_name2) + + begin + Dir.chdir(dir_name1) do + dir2.chdir { Dir.unlink dir_name1 } end + Dir.pwd.should == @original ensure - dir2.close + Dir.unlink dir_name1 if Dir.exist?(dir_name1) + Dir.unlink dir_name2 if Dir.exist?(dir_name2) end + ensure + dir2.close end + end - it "always returns to the original directory when given a block" do - dir = Dir.new(DirSpecs.mock_dir) + it "always returns to the original directory when given a block" do + dir = Dir.new(DirSpecs.mock_dir) - begin - dir.chdir do - raise StandardError, "something bad happened" - end - rescue StandardError + begin + dir.chdir do + raise StandardError, "something bad happened" end - - Dir.pwd.should == @original - ensure - dir.close + rescue StandardError end + + Dir.pwd.should == @original + ensure + dir.close end end diff --git a/spec/ruby/core/dir/close_spec.rb b/spec/ruby/core/dir/close_spec.rb index f7cce318b8b17d..10ad1369c84d2f 100644 --- a/spec/ruby/core/dir/close_spec.rb +++ b/spec/ruby/core/dir/close_spec.rb @@ -24,7 +24,7 @@ dir.close.should == nil end - ruby_version_is '3.3'...'3.4' do + ruby_version_is ''...'3.4' do platform_is_not :windows do it "does not raise an error even if the file descriptor is closed with another Dir instance" do dir = Dir.open DirSpecs.mock_dir diff --git a/spec/ruby/core/dir/fchdir_spec.rb b/spec/ruby/core/dir/fchdir_spec.rb index 52600a95f2b80c..d5e77f7f03f372 100644 --- a/spec/ruby/core/dir/fchdir_spec.rb +++ b/spec/ruby/core/dir/fchdir_spec.rb @@ -1,73 +1,71 @@ require_relative '../../spec_helper' require_relative 'fixtures/common' -ruby_version_is '3.3' do - platform_is_not :windows do - describe "Dir.fchdir" do - before :all do - DirSpecs.create_mock_dirs - end +platform_is_not :windows do + describe "Dir.fchdir" do + before :all do + DirSpecs.create_mock_dirs + end - after :all do - DirSpecs.delete_mock_dirs - end + after :all do + DirSpecs.delete_mock_dirs + end - before :each do - @original = Dir.pwd - end + before :each do + @original = Dir.pwd + end - after :each do - Dir.chdir(@original) - end + after :each do + Dir.chdir(@original) + end - it "changes the current working directory to the directory specified by the integer file descriptor" do - dir = Dir.new(DirSpecs.mock_dir) - Dir.fchdir dir.fileno - Dir.pwd.should == DirSpecs.mock_dir - ensure - dir.close - end + it "changes the current working directory to the directory specified by the integer file descriptor" do + dir = Dir.new(DirSpecs.mock_dir) + Dir.fchdir dir.fileno + Dir.pwd.should == DirSpecs.mock_dir + ensure + dir.close + end - it "returns 0 when successfully changing directory" do - dir = Dir.new(DirSpecs.mock_dir) - Dir.fchdir(dir.fileno).should == 0 - ensure - dir.close - end + it "returns 0 when successfully changing directory" do + dir = Dir.new(DirSpecs.mock_dir) + Dir.fchdir(dir.fileno).should == 0 + ensure + dir.close + end - it "returns the value of the block when a block is given" do - dir = Dir.new(DirSpecs.mock_dir) - Dir.fchdir(dir.fileno) { :block_value }.should == :block_value - ensure - dir.close - end + it "returns the value of the block when a block is given" do + dir = Dir.new(DirSpecs.mock_dir) + Dir.fchdir(dir.fileno) { :block_value }.should == :block_value + ensure + dir.close + end - it "changes to the specified directory for the duration of the block" do - dir = Dir.new(DirSpecs.mock_dir) - Dir.fchdir(dir.fileno) { Dir.pwd }.should == DirSpecs.mock_dir - Dir.pwd.should == @original - ensure - dir.close - end + it "changes to the specified directory for the duration of the block" do + dir = Dir.new(DirSpecs.mock_dir) + Dir.fchdir(dir.fileno) { Dir.pwd }.should == DirSpecs.mock_dir + Dir.pwd.should == @original + ensure + dir.close + end - it "raises a SystemCallError if the file descriptor given is not valid" do - -> { Dir.fchdir(-1) }.should raise_error(SystemCallError, "Bad file descriptor - fchdir") - -> { Dir.fchdir(-1) { } }.should raise_error(SystemCallError, "Bad file descriptor - fchdir") - end + it "raises a SystemCallError if the file descriptor given is not valid" do + -> { Dir.fchdir(-1) }.should raise_error(SystemCallError, "Bad file descriptor - fchdir") + -> { Dir.fchdir(-1) { } }.should raise_error(SystemCallError, "Bad file descriptor - fchdir") + end - it "raises a SystemCallError if the file descriptor given is not for a directory" do - -> { Dir.fchdir $stdout.fileno }.should raise_error(SystemCallError, /(Not a directory|Invalid argument) - fchdir/) - -> { Dir.fchdir($stdout.fileno) { } }.should raise_error(SystemCallError, /(Not a directory|Invalid argument) - fchdir/) - end + it "raises a SystemCallError if the file descriptor given is not for a directory" do + -> { Dir.fchdir $stdout.fileno }.should raise_error(SystemCallError, /(Not a directory|Invalid argument) - fchdir/) + -> { Dir.fchdir($stdout.fileno) { } }.should raise_error(SystemCallError, /(Not a directory|Invalid argument) - fchdir/) end end +end - platform_is :windows do - describe "Dir.fchdir" do - it "raises NotImplementedError" do - -> { Dir.fchdir 1 }.should raise_error(NotImplementedError) - -> { Dir.fchdir(1) { } }.should raise_error(NotImplementedError) - end +platform_is :windows do + describe "Dir.fchdir" do + it "raises NotImplementedError" do + -> { Dir.fchdir 1 }.should raise_error(NotImplementedError) + -> { Dir.fchdir(1) { } }.should raise_error(NotImplementedError) end end end diff --git a/spec/ruby/core/dir/for_fd_spec.rb b/spec/ruby/core/dir/for_fd_spec.rb index ba467f2f86d928..1559e1baa43a6c 100644 --- a/spec/ruby/core/dir/for_fd_spec.rb +++ b/spec/ruby/core/dir/for_fd_spec.rb @@ -2,77 +2,75 @@ require_relative 'fixtures/common' quarantine! do # leads to "Errno::EBADF: Bad file descriptor - closedir" in DirSpecs.delete_mock_dirs -ruby_version_is '3.3' do - platform_is_not :windows do - describe "Dir.for_fd" do - before :all do - DirSpecs.create_mock_dirs - end +platform_is_not :windows do + describe "Dir.for_fd" do + before :all do + DirSpecs.create_mock_dirs + end - after :all do - DirSpecs.delete_mock_dirs - end + after :all do + DirSpecs.delete_mock_dirs + end - before :each do - @original = Dir.pwd - end + before :each do + @original = Dir.pwd + end - after :each do - Dir.chdir(@original) - end + after :each do + Dir.chdir(@original) + end - it "returns a new Dir object representing the directory specified by the given integer directory file descriptor" do - dir = Dir.new(DirSpecs.mock_dir) - dir_new = Dir.for_fd(dir.fileno) + it "returns a new Dir object representing the directory specified by the given integer directory file descriptor" do + dir = Dir.new(DirSpecs.mock_dir) + dir_new = Dir.for_fd(dir.fileno) - dir_new.should.instance_of?(Dir) - dir_new.children.should == dir.children - dir_new.fileno.should == dir.fileno - ensure - dir.close - end + dir_new.should.instance_of?(Dir) + dir_new.children.should == dir.children + dir_new.fileno.should == dir.fileno + ensure + dir.close + end - it "returns a new Dir object without associated path" do - dir = Dir.new(DirSpecs.mock_dir) - dir_new = Dir.for_fd(dir.fileno) + it "returns a new Dir object without associated path" do + dir = Dir.new(DirSpecs.mock_dir) + dir_new = Dir.for_fd(dir.fileno) - dir_new.path.should == nil - ensure - dir.close - end + dir_new.path.should == nil + ensure + dir.close + end - it "calls #to_int to convert a value to an Integer" do - dir = Dir.new(DirSpecs.mock_dir) - obj = mock("fd") - obj.should_receive(:to_int).and_return(dir.fileno) + it "calls #to_int to convert a value to an Integer" do + dir = Dir.new(DirSpecs.mock_dir) + obj = mock("fd") + obj.should_receive(:to_int).and_return(dir.fileno) - dir_new = Dir.for_fd(obj) - dir_new.fileno.should == dir.fileno - ensure - dir.close - end + dir_new = Dir.for_fd(obj) + dir_new.fileno.should == dir.fileno + ensure + dir.close + end - it "raises TypeError when value cannot be converted to Integer" do - -> { - Dir.for_fd(nil) - }.should raise_error(TypeError, "no implicit conversion from nil to integer") - end + it "raises TypeError when value cannot be converted to Integer" do + -> { + Dir.for_fd(nil) + }.should raise_error(TypeError, "no implicit conversion from nil to integer") + end - it "raises a SystemCallError if the file descriptor given is not valid" do - -> { Dir.for_fd(-1) }.should raise_error(SystemCallError, "Bad file descriptor - fdopendir") - end + it "raises a SystemCallError if the file descriptor given is not valid" do + -> { Dir.for_fd(-1) }.should raise_error(SystemCallError, "Bad file descriptor - fdopendir") + end - it "raises a SystemCallError if the file descriptor given is not for a directory" do - -> { Dir.for_fd $stdout.fileno }.should raise_error(SystemCallError, "Not a directory - fdopendir") - end + it "raises a SystemCallError if the file descriptor given is not for a directory" do + -> { Dir.for_fd $stdout.fileno }.should raise_error(SystemCallError, "Not a directory - fdopendir") end end +end - platform_is :windows do - describe "Dir.for_fd" do - it "raises NotImplementedError" do - -> { Dir.for_fd 1 }.should raise_error(NotImplementedError) - end +platform_is :windows do + describe "Dir.for_fd" do + it "raises NotImplementedError" do + -> { Dir.for_fd 1 }.should raise_error(NotImplementedError) end end end diff --git a/spec/ruby/core/encoding/ascii_compatible_spec.rb b/spec/ruby/core/encoding/ascii_compatible_spec.rb index 4804300e855dff..bbcc6add9e4e1b 100644 --- a/spec/ruby/core/encoding/ascii_compatible_spec.rb +++ b/spec/ruby/core/encoding/ascii_compatible_spec.rb @@ -8,4 +8,15 @@ it "returns false if self does not represent an ASCII-compatible encoding" do Encoding::UTF_16LE.ascii_compatible?.should be_false end + + it "returns false for UTF_16 and UTF_32" do + Encoding::UTF_16.should_not.ascii_compatible? + Encoding::UTF_32.should_not.ascii_compatible? + end + + it "is always false for dummy encodings" do + Encoding.list.select(&:dummy?).each do |encoding| + encoding.should_not.ascii_compatible? + end + end end diff --git a/spec/ruby/core/encoding/dummy_spec.rb b/spec/ruby/core/encoding/dummy_spec.rb index 75ffcd5a4ec093..77caebca9a2871 100644 --- a/spec/ruby/core/encoding/dummy_spec.rb +++ b/spec/ruby/core/encoding/dummy_spec.rb @@ -11,4 +11,15 @@ Encoding::CP50221.dummy?.should be_true Encoding::UTF_7.dummy?.should be_true end + + it "returns true for UTF_16 and UTF_32" do + Encoding::UTF_16.should.dummy? + Encoding::UTF_32.should.dummy? + end + + it "implies not #ascii_compatible?" do + Encoding.list.select(&:dummy?).each do |encoding| + encoding.should_not.ascii_compatible? + end + end end diff --git a/spec/ruby/core/encoding/replicate_spec.rb b/spec/ruby/core/encoding/replicate_spec.rb index 2da998837f866b..9fe0ba87478bd0 100644 --- a/spec/ruby/core/encoding/replicate_spec.rb +++ b/spec/ruby/core/encoding/replicate_spec.rb @@ -2,87 +2,7 @@ require_relative '../../spec_helper' describe "Encoding#replicate" do - ruby_version_is ""..."3.3" do - before :all do - @i = 0 - end - - before :each do - @i += 1 - @prefix = "RS#{@i}" - end - - it "returns a replica of ASCII" do - name = @prefix + '-ASCII' - e = suppress_warning { Encoding::ASCII.replicate(name) } - e.name.should == name - Encoding.find(name).should == e - - "a".dup.force_encoding(e).valid_encoding?.should be_true - "\x80".dup.force_encoding(e).valid_encoding?.should be_false - end - - it "returns a replica of UTF-8" do - name = @prefix + 'UTF-8' - e = suppress_warning { Encoding::UTF_8.replicate(name) } - e.name.should == name - Encoding.find(name).should == e - - "a".dup.force_encoding(e).valid_encoding?.should be_true - "\u3042".dup.force_encoding(e).valid_encoding?.should be_true - "\x80".dup.force_encoding(e).valid_encoding?.should be_false - end - - it "returns a replica of UTF-16BE" do - name = @prefix + 'UTF-16-BE' - e = suppress_warning { Encoding::UTF_16BE.replicate(name) } - e.name.should == name - Encoding.find(name).should == e - - "a".dup.force_encoding(e).valid_encoding?.should be_false - "\x30\x42".dup.force_encoding(e).valid_encoding?.should be_true - "\x80".dup.force_encoding(e).valid_encoding?.should be_false - end - - it "returns a replica of ISO-2022-JP" do - name = @prefix + 'ISO-2022-JP' - e = suppress_warning { Encoding::ISO_2022_JP.replicate(name) } - Encoding.find(name).should == e - - e.name.should == name - e.dummy?.should be_true - end - - # NOTE: it's unclear of the value of this (for the complexity cost of it), - # but it is the current CRuby behavior. - it "can be associated with a String" do - name = @prefix + '-US-ASCII' - e = suppress_warning { Encoding::US_ASCII.replicate(name) } - e.name.should == name - Encoding.find(name).should == e - - s = "abc".dup.force_encoding(e) - s.encoding.should == e - s.encoding.name.should == name - end - end - - ruby_version_is ""..."3.3" do - it "warns about deprecation" do - -> { - Encoding::US_ASCII.replicate('MY-US-ASCII') - }.should complain(/warning: Encoding#replicate is deprecated and will be removed in Ruby 3.3; use the original encoding instead/) - end - - it "raises EncodingError if too many encodings" do - code = '1_000.times {|i| Encoding::US_ASCII.replicate("R_#{i}") }' - ruby_exe(code, args: "2>&1", exit_status: 1).should.include?('too many encoding (> 256) (EncodingError)') - end - end - - ruby_version_is "3.3" do - it "has been removed" do - Encoding::US_ASCII.should_not.respond_to?(:replicate, true) - end + it "has been removed" do + Encoding::US_ASCII.should_not.respond_to?(:replicate, true) end end diff --git a/spec/ruby/core/enumerator/each_spec.rb b/spec/ruby/core/enumerator/each_spec.rb index 3af16e5587e466..8c9785cc85fe37 100644 --- a/spec/ruby/core/enumerator/each_spec.rb +++ b/spec/ruby/core/enumerator/each_spec.rb @@ -86,4 +86,19 @@ def object_each_with_arguments.each_with_arguments(arg, *args) ret.should be_an_instance_of(Enumerator) ret.should_not equal(@enum_with_arguments) end + + it "does not destructure yielded array values when chaining each.map" do + result = [[[1]]].each.map { |a, b| [a, b] } + result.should == [[[1], nil]] + end + + it "preserves array values yielded from the enumerator" do + result = [[1, 2]].each.map { |a| a } + result.should == [[1, 2]] + end + + it "allows destructuring to occur in the block, not the enumerator" do + result = [[1, 2]].each.map { |a, b| a } + result.should == [1] + end end diff --git a/spec/ruby/core/exception/no_method_error_spec.rb b/spec/ruby/core/exception/no_method_error_spec.rb index 772c569f67963e..d20878c6e3328d 100644 --- a/spec/ruby/core/exception/no_method_error_spec.rb +++ b/spec/ruby/core/exception/no_method_error_spec.rb @@ -66,204 +66,145 @@ end end - ruby_version_is ""..."3.3" do - it "calls #inspect when calling Exception#message" do - ScratchPad.record [] - test_class = Class.new do - def inspect - ScratchPad << :inspect_called - "" - end - end - instance = test_class.new - - begin - instance.bar - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']bar' for :#$/ - ScratchPad.recorded.should == [:inspect_called] - end - end - - it "fallbacks to a simpler representation of the receiver when receiver.inspect raises an exception" do - test_class = Class.new do - def inspect - raise NoMethodErrorSpecs::InstanceException - end - end - instance = test_class.new - - begin - instance.bar - rescue NoMethodError => error - message = error.message - message.should =~ /undefined method.+\bbar\b/ - message.should include test_class.inspect - end - end - - it "uses #name to display the receiver if it is a class" do - klass = Class.new { def self.name; "MyClass"; end } - - begin - klass.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for MyClass:Class$/ - end + it "uses a literal name when receiver is nil" do + begin + nil.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for nil\Z/ end + end - it "uses #name to display the receiver if it is a module" do - mod = Module.new { def self.name; "MyModule"; end } - - begin - mod.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for MyModule:Module$/ - end + it "uses a literal name when receiver is true" do + begin + true.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for true\Z/ end end - ruby_version_is "3.3" do - it "uses a literal name when receiver is nil" do - begin - nil.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for nil\Z/ - end + it "uses a literal name when receiver is false" do + begin + false.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for false\Z/ end + end - it "uses a literal name when receiver is true" do - begin - true.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for true\Z/ - end - end + it "uses #name when receiver is a class" do + klass = Class.new { def self.name; "MyClass"; end } - it "uses a literal name when receiver is false" do - begin - false.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for false\Z/ - end + begin + klass.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for class MyClass\Z/ end + end - it "uses #name when receiver is a class" do - klass = Class.new { def self.name; "MyClass"; end } + it "uses class' string representation when receiver is an anonymous class" do + klass = Class.new - begin - klass.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for class MyClass\Z/ - end + begin + klass.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for class #\Z/ end + end - it "uses class' string representation when receiver is an anonymous class" do - klass = Class.new + it "uses class' string representation when receiver is a singleton class" do + obj = Object.new + singleton_class = obj.singleton_class - begin - klass.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for class #\Z/ - end + begin + singleton_class.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for class #>\Z/ end + end - it "uses class' string representation when receiver is a singleton class" do - obj = Object.new - singleton_class = obj.singleton_class + it "uses #name when receiver is a module" do + mod = Module.new { def self.name; "MyModule"; end } - begin - singleton_class.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for class #>\Z/ - end + begin + mod.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for module MyModule\Z/ end + end - it "uses #name when receiver is a module" do - mod = Module.new { def self.name; "MyModule"; end } + it "uses module's string representation when receiver is an anonymous module" do + m = Module.new - begin - mod.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for module MyModule\Z/ - end + begin + m.foo + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for module #\Z/ end + end - it "uses module's string representation when receiver is an anonymous module" do - m = Module.new + it "uses class #name when receiver is an ordinary object" do + klass = Class.new { def self.name; "MyClass"; end } + instance = klass.new - begin - m.foo - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for module #\Z/ - end + begin + instance.bar + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']bar' for an instance of MyClass\Z/ end + end - it "uses class #name when receiver is an ordinary object" do - klass = Class.new { def self.name; "MyClass"; end } - instance = klass.new + it "uses class string representation when receiver is an instance of anonymous class" do + klass = Class.new + instance = klass.new - begin - instance.bar - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']bar' for an instance of MyClass\Z/ - end + begin + instance.bar + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']bar' for an instance of #\Z/ end + end - it "uses class string representation when receiver is an instance of anonymous class" do - klass = Class.new - instance = klass.new + it "uses class name when receiver has a singleton class" do + instance = NoMethodErrorSpecs::NoMethodErrorA.new + def instance.foo; end - begin - instance.bar - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']bar' for an instance of #\Z/ - end + begin + instance.bar + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']bar' for #\Z/ end + end - it "uses class name when receiver has a singleton class" do - instance = NoMethodErrorSpecs::NoMethodErrorA.new - def instance.foo; end - - begin - instance.bar - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']bar' for #\Z/ + it "does not call #inspect when calling Exception#message" do + ScratchPad.record [] + test_class = Class.new do + def inspect + ScratchPad << :inspect_called + "" end end + instance = test_class.new - it "does not call #inspect when calling Exception#message" do - ScratchPad.record [] - test_class = Class.new do - def inspect - ScratchPad << :inspect_called - "" - end - end - instance = test_class.new - - begin - instance.bar - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']bar' for an instance of #\Z/ - ScratchPad.recorded.should == [] - end + begin + instance.bar + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']bar' for an instance of #\Z/ + ScratchPad.recorded.should == [] end + end - it "does not truncate long class names" do - class_name = 'ExceptionSpecs::A' + 'a'*100 + it "does not truncate long class names" do + class_name = 'ExceptionSpecs::A' + 'a'*100 - begin - eval <<~RUBY - class #{class_name} - end + begin + eval <<~RUBY + class #{class_name} + end - obj = #{class_name}.new - obj.foo - RUBY - rescue NoMethodError => error - error.message.should =~ /\Aundefined method [`']foo' for an instance of #{class_name}\Z/ - end + obj = #{class_name}.new + obj.foo + RUBY + rescue NoMethodError => error + error.message.should =~ /\Aundefined method [`']foo' for an instance of #{class_name}\Z/ end end end diff --git a/spec/ruby/core/false/singleton_method_spec.rb b/spec/ruby/core/false/singleton_method_spec.rb index 738794b46c26f8..16dc85d67c64b7 100644 --- a/spec/ruby/core/false/singleton_method_spec.rb +++ b/spec/ruby/core/false/singleton_method_spec.rb @@ -1,15 +1,13 @@ require_relative '../../spec_helper' describe "FalseClass#singleton_method" do - ruby_version_is '3.3' do - it "raises regardless of whether FalseClass defines the method" do + it "raises regardless of whether FalseClass defines the method" do + -> { false.singleton_method(:foo) }.should raise_error(NameError) + begin + def (false).foo; end -> { false.singleton_method(:foo) }.should raise_error(NameError) - begin - def (false).foo; end - -> { false.singleton_method(:foo) }.should raise_error(NameError) - ensure - FalseClass.send(:remove_method, :foo) - end + ensure + FalseClass.send(:remove_method, :foo) end end end diff --git a/spec/ruby/core/fiber/kill_spec.rb b/spec/ruby/core/fiber/kill_spec.rb index 2f4c499280f400..abf23ff17621fa 100644 --- a/spec/ruby/core/fiber/kill_spec.rb +++ b/spec/ruby/core/fiber/kill_spec.rb @@ -2,89 +2,87 @@ require_relative 'fixtures/classes' require_relative '../../shared/kernel/raise' -ruby_version_is "3.3" do - describe "Fiber#kill" do - it "kills a non-resumed fiber" do - fiber = Fiber.new{} +describe "Fiber#kill" do + it "kills a non-resumed fiber" do + fiber = Fiber.new{} - fiber.alive?.should == true + fiber.alive?.should == true - fiber.kill - fiber.alive?.should == false - end - - it "kills a resumed fiber" do - fiber = Fiber.new{while true; Fiber.yield; end} - fiber.resume - - fiber.alive?.should == true + fiber.kill + fiber.alive?.should == false + end - fiber.kill - fiber.alive?.should == false - end + it "kills a resumed fiber" do + fiber = Fiber.new{while true; Fiber.yield; end} + fiber.resume - it "can kill itself" do - fiber = Fiber.new do - Fiber.current.kill - end + fiber.alive?.should == true - fiber.alive?.should == true + fiber.kill + fiber.alive?.should == false + end - fiber.resume - fiber.alive?.should == false + it "can kill itself" do + fiber = Fiber.new do + Fiber.current.kill end - it "kills a resumed fiber from a child" do - parent = Fiber.new do - child = Fiber.new do - parent.kill - parent.alive?.should == true - end + fiber.alive?.should == true + + fiber.resume + fiber.alive?.should == false + end - child.resume + it "kills a resumed fiber from a child" do + parent = Fiber.new do + child = Fiber.new do + parent.kill + parent.alive?.should == true end - parent.resume - parent.alive?.should == false + child.resume end - it "executes the ensure block" do - ensure_executed = false + parent.resume + parent.alive?.should == false + end - fiber = Fiber.new do - while true; Fiber.yield; end - ensure - ensure_executed = true - end + it "executes the ensure block" do + ensure_executed = false - fiber.resume - fiber.kill - ensure_executed.should == true + fiber = Fiber.new do + while true; Fiber.yield; end + ensure + ensure_executed = true end - it "does not execute rescue block" do - rescue_executed = false + fiber.resume + fiber.kill + ensure_executed.should == true + end - fiber = Fiber.new do - while true; Fiber.yield; end - rescue Exception - rescue_executed = true - end + it "does not execute rescue block" do + rescue_executed = false - fiber.resume - fiber.kill - rescue_executed.should == false + fiber = Fiber.new do + while true; Fiber.yield; end + rescue Exception + rescue_executed = true end - it "repeatedly kills a fiber" do - fiber = Fiber.new do - while true; Fiber.yield; end - ensure - while true; Fiber.yield; end - end + fiber.resume + fiber.kill + rescue_executed.should == false + end - fiber.kill - fiber.alive?.should == false + it "repeatedly kills a fiber" do + fiber = Fiber.new do + while true; Fiber.yield; end + ensure + while true; Fiber.yield; end end + + fiber.kill + fiber.alive?.should == false end end diff --git a/spec/ruby/core/fiber/storage_spec.rb b/spec/ruby/core/fiber/storage_spec.rb index 015caaf3bbff48..6ffc13ee283bec 100644 --- a/spec/ruby/core/fiber/storage_spec.rb +++ b/spec/ruby/core/fiber/storage_spec.rb @@ -161,13 +161,11 @@ def key.to_str; "Foo"; end -> { Fiber[Object.new] = 44 }.should raise_error(TypeError) end - ruby_version_is "3.3" do - it "deletes the fiber storage key when assigning nil" do - Fiber.new(storage: {life: 42}) { - Fiber[:life] = nil - Fiber.current.storage - }.resume.should == {} - end + it "deletes the fiber storage key when assigning nil" do + Fiber.new(storage: {life: 42}) { + Fiber[:life] = nil + Fiber.current.storage + }.resume.should == {} end end diff --git a/spec/ruby/core/file/basename_spec.rb b/spec/ruby/core/file/basename_spec.rb index 87695ab97be3ca..66a5b56ed9a11a 100644 --- a/spec/ruby/core/file/basename_spec.rb +++ b/spec/ruby/core/file/basename_spec.rb @@ -162,11 +162,7 @@ it "rejects strings encoded with non ASCII-compatible encodings" do Encoding.list.reject(&:ascii_compatible?).reject(&:dummy?).each do |enc| - begin - path = "/foo/bar".encode(enc) - rescue Encoding::ConverterNotFoundError - next - end + path = "/foo/bar".encode(enc) -> { File.basename(path) diff --git a/spec/ruby/core/file/dirname_spec.rb b/spec/ruby/core/file/dirname_spec.rb index 8e6016ce6fef5b..1b006af7839f17 100644 --- a/spec/ruby/core/file/dirname_spec.rb +++ b/spec/ruby/core/file/dirname_spec.rb @@ -78,7 +78,33 @@ def object.to_int; 2; end File.dirname("foo/../").should == "foo" end + it "rejects strings encoded with non ASCII-compatible encodings" do + Encoding.list.reject(&:ascii_compatible?).reject(&:dummy?).each do |enc| + path = "/foo/bar".encode(enc) + -> { + File.dirname(path) + }.should raise_error(Encoding::CompatibilityError) + end + end + + it "works with all ASCII-compatible encodings" do + Encoding.list.select(&:ascii_compatible?).each do |enc| + File.dirname("/foo/bar".encode(enc)).should == "/foo".encode(enc) + end + end + + it "handles Shift JIS 0x5C (\\) as second byte of a multi-byte sequence" do + # dir/fileソname.txt + path = "dir/file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) + path.valid_encoding?.should be_true + File.dirname(path).should == "dir" + end + platform_is_not :windows do + it "ignores repeated leading / (edge cases on non-windows)" do + File.dirname("/////foo/bar/").should == "/foo" + end + it "returns all the components of filename except the last one (edge cases on non-windows)" do File.dirname('/////').should == '/' File.dirname("//foo//").should == "/" @@ -94,6 +120,13 @@ def object.to_int; 2; end File.dirname("//foo//").should == "//foo" File.dirname('/////').should == '//' end + + it "handles Shift JIS 0x5C (\\) as second byte of a multi-byte sequence (windows)" do + # dir\fileソname.txt + path = "dir\\file\x83\x5cname.txt".b.force_encoding(Encoding::SHIFT_JIS) + path.valid_encoding?.should be_true + File.dirname(path).should == "dir" + end end it "accepts an object that has a #to_path method" do diff --git a/spec/ruby/core/float/ceil_spec.rb b/spec/ruby/core/float/ceil_spec.rb index 75f56102922e82..5236a133f5de18 100644 --- a/spec/ruby/core/float/ceil_spec.rb +++ b/spec/ruby/core/float/ceil_spec.rb @@ -2,7 +2,7 @@ require_relative '../integer/shared/integer_ceil_precision' describe "Float#ceil" do - context "with precision" do + context "with values equal to integers" do it_behaves_like :integer_ceil_precision, :Float end @@ -20,7 +20,9 @@ 2.1679.ceil(0).should eql(3) 214.94.ceil(-1).should eql(220) 7.0.ceil(1).should eql(7.0) + 200.0.ceil(-2).should eql(200) -1.234.ceil(2).should eql(-1.23) 5.123812.ceil(4).should eql(5.1239) + 10.00001.ceil(5).should eql(10.00001) end end diff --git a/spec/ruby/core/float/floor_spec.rb b/spec/ruby/core/float/floor_spec.rb index 8b492ef4732fb7..1fafdadee9b6d7 100644 --- a/spec/ruby/core/float/floor_spec.rb +++ b/spec/ruby/core/float/floor_spec.rb @@ -2,7 +2,7 @@ require_relative '../integer/shared/integer_floor_precision' describe "Float#floor" do - context "with precision" do + context "with values equal to integers" do it_behaves_like :integer_floor_precision, :Float end @@ -20,7 +20,9 @@ 2.1679.floor(0).should eql(2) 214.94.floor(-1).should eql(210) 7.0.floor(1).should eql(7.0) + 200.0.floor(-2).should eql(200) -1.234.floor(2).should eql(-1.24) 5.123812.floor(4).should eql(5.1238) + 10.00001.floor(5).should eql(10.00001) end end diff --git a/spec/ruby/core/float/round_spec.rb b/spec/ruby/core/float/round_spec.rb index 7e8c792051b9d5..3e6575100bd52a 100644 --- a/spec/ruby/core/float/round_spec.rb +++ b/spec/ruby/core/float/round_spec.rb @@ -66,6 +66,7 @@ it "works for corner cases" do 42.0.round(308).should eql(42.0) 1.0e307.round(2).should eql(1.0e307) + 120.0.round(-1).should eql(120) end # redmine:5271 @@ -145,37 +146,35 @@ -4.809999999999999.round(5, half: :even).should eql(-4.81) end - ruby_bug "#19318", ""..."3.3" do - # These numbers are neighbouring floating point numbers round a - # precise value. They test that the rounding modes work correctly - # round that value and precision is not lost which might cause - # incorrect results. - it "does not lose precision during the rounding process" do - 767573.1875850001.round(5, half: nil).should eql(767573.18759) - 767573.1875850001.round(5, half: :up).should eql(767573.18759) - 767573.1875850001.round(5, half: :down).should eql(767573.18759) - 767573.1875850001.round(5, half: :even).should eql(767573.18759) - -767573.1875850001.round(5, half: nil).should eql(-767573.18759) - -767573.1875850001.round(5, half: :up).should eql(-767573.18759) - -767573.1875850001.round(5, half: :down).should eql(-767573.18759) - -767573.1875850001.round(5, half: :even).should eql(-767573.18759) - 767573.187585.round(5, half: nil).should eql(767573.18759) - 767573.187585.round(5, half: :up).should eql(767573.18759) - 767573.187585.round(5, half: :down).should eql(767573.18758) - 767573.187585.round(5, half: :even).should eql(767573.18758) - -767573.187585.round(5, half: nil).should eql(-767573.18759) - -767573.187585.round(5, half: :up).should eql(-767573.18759) - -767573.187585.round(5, half: :down).should eql(-767573.18758) - -767573.187585.round(5, half: :even).should eql(-767573.18758) - 767573.1875849998.round(5, half: nil).should eql(767573.18758) - 767573.1875849998.round(5, half: :up).should eql(767573.18758) - 767573.1875849998.round(5, half: :down).should eql(767573.18758) - 767573.1875849998.round(5, half: :even).should eql(767573.18758) - -767573.1875849998.round(5, half: nil).should eql(-767573.18758) - -767573.1875849998.round(5, half: :up).should eql(-767573.18758) - -767573.1875849998.round(5, half: :down).should eql(-767573.18758) - -767573.1875849998.round(5, half: :even).should eql(-767573.18758) - end + # These numbers are neighbouring floating point numbers round a + # precise value. They test that the rounding modes work correctly + # round that value and precision is not lost which might cause + # incorrect results. + it "does not lose precision during the rounding process" do + 767573.1875850001.round(5, half: nil).should eql(767573.18759) + 767573.1875850001.round(5, half: :up).should eql(767573.18759) + 767573.1875850001.round(5, half: :down).should eql(767573.18759) + 767573.1875850001.round(5, half: :even).should eql(767573.18759) + -767573.1875850001.round(5, half: nil).should eql(-767573.18759) + -767573.1875850001.round(5, half: :up).should eql(-767573.18759) + -767573.1875850001.round(5, half: :down).should eql(-767573.18759) + -767573.1875850001.round(5, half: :even).should eql(-767573.18759) + 767573.187585.round(5, half: nil).should eql(767573.18759) + 767573.187585.round(5, half: :up).should eql(767573.18759) + 767573.187585.round(5, half: :down).should eql(767573.18758) + 767573.187585.round(5, half: :even).should eql(767573.18758) + -767573.187585.round(5, half: nil).should eql(-767573.18759) + -767573.187585.round(5, half: :up).should eql(-767573.18759) + -767573.187585.round(5, half: :down).should eql(-767573.18758) + -767573.187585.round(5, half: :even).should eql(-767573.18758) + 767573.1875849998.round(5, half: nil).should eql(767573.18758) + 767573.1875849998.round(5, half: :up).should eql(767573.18758) + 767573.1875849998.round(5, half: :down).should eql(767573.18758) + 767573.1875849998.round(5, half: :even).should eql(767573.18758) + -767573.1875849998.round(5, half: nil).should eql(-767573.18758) + -767573.1875849998.round(5, half: :up).should eql(-767573.18758) + -767573.1875849998.round(5, half: :down).should eql(-767573.18758) + -767573.1875849998.round(5, half: :even).should eql(-767573.18758) end it "raises FloatDomainError for exceptional values with a half option" do @@ -197,7 +196,13 @@ it "returns 0 for 0 or undefined ndigits" do (0.0).round.should == 0 (-0.0).round(0).should == 0 - (0.0).round(half: :up) == 0 + (0.0).round(half: :up).should == 0 + end + + it "returns 0 for negative ndigits" do + (0.0).round(-1).should == 0 + (-0.0).round(-1).should == 0 + (0.0).round(-1, half: :up).should == 0 end end end diff --git a/spec/ruby/core/gc/config_spec.rb b/spec/ruby/core/gc/config_spec.rb index e20e8e4a16a97f..db452b0907f58b 100644 --- a/spec/ruby/core/gc/config_spec.rb +++ b/spec/ruby/core/gc/config_spec.rb @@ -40,6 +40,20 @@ GC.config.should == previous end + ruby_version_is ""..."4.0" do + it "returns the same as GC.config but without the :implementation key" do + previous = GC.config + GC.config({}).should == previous.except(:implementation) + end + end + + ruby_version_is "4.0" do + it "returns the same as GC.config, including the :implementation key" do + previous = GC.config + GC.config({}).should == previous + end + end + it "raises an ArgumentError if options include global keys" do -> { GC.config(implementation: "default") }.should raise_error(ArgumentError, 'Attempting to set read-only key "Implementation"') end diff --git a/spec/ruby/core/hash/compact_spec.rb b/spec/ruby/core/hash/compact_spec.rb index 13371bce434fc9..48f8bb7cae166c 100644 --- a/spec/ruby/core/hash/compact_spec.rb +++ b/spec/ruby/core/hash/compact_spec.rb @@ -19,28 +19,26 @@ @hash.should == @initial_pairs end - ruby_version_is '3.3' do - it "retains the default value" do - hash = Hash.new(1) - hash.compact.default.should == 1 - hash[:a] = 1 - hash.compact.default.should == 1 - end + it "retains the default value" do + hash = Hash.new(1) + hash.compact.default.should == 1 + hash[:a] = 1 + hash.compact.default.should == 1 + end - it "retains the default_proc" do - pr = proc { |h, k| h[k] = [] } - hash = Hash.new(&pr) - hash.compact.default_proc.should == pr - hash[:a] = 1 - hash.compact.default_proc.should == pr - end + it "retains the default_proc" do + pr = proc { |h, k| h[k] = [] } + hash = Hash.new(&pr) + hash.compact.default_proc.should == pr + hash[:a] = 1 + hash.compact.default_proc.should == pr + end - it "retains compare_by_identity flag" do - hash = {}.compare_by_identity - hash.compact.compare_by_identity?.should == true - hash[:a] = 1 - hash.compact.compare_by_identity?.should == true - end + it "retains compare_by_identity flag" do + hash = {}.compare_by_identity + hash.compact.compare_by_identity?.should == true + hash[:a] = 1 + hash.compact.compare_by_identity?.should == true end end diff --git a/spec/ruby/core/hash/constructor_spec.rb b/spec/ruby/core/hash/constructor_spec.rb index 0f97f7b40e9c2c..301f8675ce27be 100644 --- a/spec/ruby/core/hash/constructor_spec.rb +++ b/spec/ruby/core/hash/constructor_spec.rb @@ -44,23 +44,23 @@ it "raises for elements that are not arrays" do -> { - Hash[[:a]].should == {} - }.should raise_error(ArgumentError) + Hash[[:a]] + }.should raise_error(ArgumentError, "wrong element type Symbol at 0 (expected array)") -> { - Hash[[:nil]].should == {} - }.should raise_error(ArgumentError) + Hash[[nil]] + }.should raise_error(ArgumentError, "wrong element type nil at 0 (expected array)") end it "raises an ArgumentError for arrays of more than 2 elements" do - ->{ Hash[[[:a, :b, :c]]].should == {} }.should raise_error(ArgumentError) + ->{ + Hash[[[:a, :b, :c]]] + }.should raise_error(ArgumentError, "invalid number of elements (3 for 1..2)") end it "raises an ArgumentError when passed a list of value-invalid-pairs in an array" do -> { - -> { - Hash[[[:a, 1], [:b], 42, [:d, 2], [:e, 2, 3], []]] - }.should complain(/ignoring wrong elements/) - }.should raise_error(ArgumentError) + Hash[[[:a, 1], [:b], 42, [:d, 2], [:e, 2, 3], []]] + }.should raise_error(ArgumentError, "wrong element type Integer at 2 (expected array)") end describe "passed a single argument which responds to #to_hash" do @@ -117,13 +117,11 @@ def obj.to_hash() { 1 => 2, 3 => 4 } end Hash[hash].default_proc.should be_nil end - ruby_version_is '3.3' do - it "does not retain compare_by_identity flag" do - hash = { a: 1 }.compare_by_identity - Hash[hash].compare_by_identity?.should == false + it "does not retain compare_by_identity flag" do + hash = { a: 1 }.compare_by_identity + Hash[hash].compare_by_identity?.should == false - hash = {}.compare_by_identity - Hash[hash].compare_by_identity?.should == false - end + hash = {}.compare_by_identity + Hash[hash].compare_by_identity?.should == false end end diff --git a/spec/ruby/core/hash/new_spec.rb b/spec/ruby/core/hash/new_spec.rb index 5ae3e1f98d6205..8de44ec9411deb 100644 --- a/spec/ruby/core/hash/new_spec.rb +++ b/spec/ruby/core/hash/new_spec.rb @@ -34,7 +34,7 @@ -> { Hash.new(nil) { 0 } }.should raise_error(ArgumentError) end - ruby_version_is "3.3"..."3.4" do + ruby_version_is ""..."3.4" do it "emits a deprecation warning if keyword arguments are passed" do -> { Hash.new(unknown: true) }.should complain( Regexp.new(Regexp.escape("Calling Hash.new with keyword arguments is deprecated and will be removed in Ruby 3.4; use Hash.new({ key: value }) instead")) diff --git a/spec/ruby/core/hash/ruby2_keywords_hash_spec.rb b/spec/ruby/core/hash/ruby2_keywords_hash_spec.rb index 7dbb9c0a98351d..ddf9038800005f 100644 --- a/spec/ruby/core/hash/ruby2_keywords_hash_spec.rb +++ b/spec/ruby/core/hash/ruby2_keywords_hash_spec.rb @@ -72,12 +72,10 @@ Hash.ruby2_keywords_hash(hash).default_proc.should == pr end - ruby_version_is '3.3' do - it "retains compare_by_identity_flag" do - hash = {}.compare_by_identity - Hash.ruby2_keywords_hash(hash).compare_by_identity?.should == true - hash[:a] = 1 - Hash.ruby2_keywords_hash(hash).compare_by_identity?.should == true - end + it "retains compare_by_identity_flag" do + hash = {}.compare_by_identity + Hash.ruby2_keywords_hash(hash).compare_by_identity?.should == true + hash[:a] = 1 + Hash.ruby2_keywords_hash(hash).compare_by_identity?.should == true end end diff --git a/spec/ruby/core/hash/shared/to_s.rb b/spec/ruby/core/hash/shared/to_s.rb index e116b8878b9699..38dd2c44360a95 100644 --- a/spec/ruby/core/hash/shared/to_s.rb +++ b/spec/ruby/core/hash/shared/to_s.rb @@ -89,5 +89,36 @@ it "adds quotes to symbol keys that are not valid symbol literals" do { "needs-quotes": 1 }.send(@method).should == '{"needs-quotes": 1}' end + + it "can be evaled" do + no_quote = '{a: 1, a!: 1, a?: 1}' + eval(no_quote).inspect.should == no_quote + [ + '{"": 1}', + '{"0": 1, "!": 1, "%": 1, "&": 1, "*": 1, "+": 1, "-": 1, "/": 1, "<": 1, ">": 1, "^": 1, "`": 1, "|": 1, "~": 1}', + '{"@a": 1, "$a": 1, "+@": 1, "a=": 1, "[]": 1}', + '{"a\"b": 1, "@@a": 1, "<=>": 1, "===": 1, "[]=": 1}', + ].each do |quote| + eval(quote).inspect.should == quote + end + end + + it "can be evaled when Encoding.default_external is changed" do + external = Encoding.default_external + + Encoding.default_external = Encoding::ASCII + utf8_ascii_hash = '{"\\u3042": 1}' + eval(utf8_ascii_hash).inspect.should == utf8_ascii_hash + + Encoding.default_external = Encoding::UTF_8 + utf8_hash = "{\u3042: 1}" + eval(utf8_hash).inspect.should == utf8_hash + + Encoding.default_external = Encoding::Windows_31J + sjis_hash = "{\x87]: 1}".dup.force_encoding('sjis') + eval(sjis_hash).inspect.should == sjis_hash + ensure + Encoding.default_external = external + end end end diff --git a/spec/ruby/core/integer/ceil_spec.rb b/spec/ruby/core/integer/ceil_spec.rb index eb633fba78432f..395be58fbd3f48 100644 --- a/spec/ruby/core/integer/ceil_spec.rb +++ b/spec/ruby/core/integer/ceil_spec.rb @@ -10,15 +10,4 @@ context "with precision" do it_behaves_like :integer_ceil_precision, :Integer end - - context "precision argument specified as part of the ceil method is negative" do - it "returns the smallest integer greater than self with at least precision.abs trailing zeros" do - 18.ceil(-1).should eql(20) - 18.ceil(-2).should eql(100) - 18.ceil(-3).should eql(1000) - -1832.ceil(-1).should eql(-1830) - -1832.ceil(-2).should eql(-1800) - -1832.ceil(-3).should eql(-1000) - end - end end diff --git a/spec/ruby/core/integer/shared/integer_ceil_precision.rb b/spec/ruby/core/integer/shared/integer_ceil_precision.rb index 9f31c2cf615ed0..b23c17937faf0c 100644 --- a/spec/ruby/core/integer/shared/integer_ceil_precision.rb +++ b/spec/ruby/core/integer/shared/integer_ceil_precision.rb @@ -1,6 +1,6 @@ describe :integer_ceil_precision, shared: true do context "precision is zero" do - it "returns integer self" do + it "returns Integer equal to self" do send(@method, 0).ceil(0).should.eql?(0) send(@method, 123).ceil(0).should.eql?(123) send(@method, -123).ceil(0).should.eql?(-123) @@ -23,7 +23,16 @@ send(@method, 0).ceil(-10).should.eql?(0) end - it "returns largest integer less than self with at least precision.abs trailing zeros" do + it "returns Integer equal to self if there are already at least precision.abs trailing zeros" do + send(@method, 10).ceil(-1).should.eql?(10) + send(@method, 100).ceil(-1).should.eql?(100) + send(@method, 100).ceil(-2).should.eql?(100) + send(@method, -10).ceil(-1).should.eql?(-10) + send(@method, -100).ceil(-1).should.eql?(-100) + send(@method, -100).ceil(-2).should.eql?(-100) + end + + it "returns smallest Integer greater than self with at least precision.abs trailing zeros" do send(@method, 123).ceil(-1).should.eql?(130) send(@method, 123).ceil(-2).should.eql?(200) send(@method, 123).ceil(-3).should.eql?(1000) @@ -31,13 +40,15 @@ send(@method, -123).ceil(-1).should.eql?(-120) send(@method, -123).ceil(-2).should.eql?(-100) send(@method, -123).ceil(-3).should.eql?(0) + + send(@method, 100).ceil(-3).should.eql?(1000) + send(@method, -100).ceil(-3).should.eql?(0) end - ruby_bug "#20654", ""..."3.4" do - it "returns 10**precision.abs when precision.abs is larger than the number digits of self" do - send(@method, 123).ceil(-20).should.eql?(100000000000000000000) - send(@method, 123).ceil(-50).should.eql?(100000000000000000000000000000000000000000000000000) - end + # Bug #20654 + it "returns 10**precision.abs when precision.abs has more digits than self" do + send(@method, 123).ceil(-20).should.eql?(100000000000000000000) + send(@method, 123).ceil(-50).should.eql?(100000000000000000000000000000000000000000000000000) end end end diff --git a/spec/ruby/core/integer/shared/integer_floor_precision.rb b/spec/ruby/core/integer/shared/integer_floor_precision.rb index 4c5888c6c4818d..6247907d4cd0b8 100644 --- a/spec/ruby/core/integer/shared/integer_floor_precision.rb +++ b/spec/ruby/core/integer/shared/integer_floor_precision.rb @@ -33,11 +33,10 @@ send(@method, -123).floor(-3).should.eql?(-1000) end - ruby_bug "#20654", ""..."3.4" do - it "returns -(10**precision.abs) when self is negative and precision.abs is larger than the number digits of self" do - send(@method, -123).floor(-20).should.eql?(-100000000000000000000) - send(@method, -123).floor(-50).should.eql?(-100000000000000000000000000000000000000000000000000) - end + # Bug #20654 + it "returns -(10**precision.abs) when self is negative and precision.abs is larger than the number digits of self" do + send(@method, -123).floor(-20).should.eql?(-100000000000000000000) + send(@method, -123).floor(-50).should.eql?(-100000000000000000000000000000000000000000000000000) end end end diff --git a/spec/ruby/core/io/binread_spec.rb b/spec/ruby/core/io/binread_spec.rb index 9e36b84da97350..e4576c1aa1e4f3 100644 --- a/spec/ruby/core/io/binread_spec.rb +++ b/spec/ruby/core/io/binread_spec.rb @@ -45,7 +45,7 @@ -> { IO.binread @fname, 0, -1 }.should raise_error(Errno::EINVAL) end - ruby_version_is "3.3"..."4.0" do + ruby_version_is ""..."4.0" do # https://bugs.ruby-lang.org/issues/19630 it "warns about deprecation given a path with a pipe" do cmd = "|echo ok" diff --git a/spec/ruby/core/io/buffer/empty_spec.rb b/spec/ruby/core/io/buffer/empty_spec.rb index e1fd4ab6a23268..788b23f88f0a48 100644 --- a/spec/ruby/core/io/buffer/empty_spec.rb +++ b/spec/ruby/core/io/buffer/empty_spec.rb @@ -14,11 +14,9 @@ @buffer.empty?.should be_true end - ruby_version_is "3.3" do - it "is true for a 0-length String-backed buffer created with .string" do - IO::Buffer.string(0) do |buffer| - buffer.empty?.should be_true - end + it "is true for a 0-length String-backed buffer created with .string" do + IO::Buffer.string(0) do |buffer| + buffer.empty?.should be_true end end diff --git a/spec/ruby/core/io/buffer/external_spec.rb b/spec/ruby/core/io/buffer/external_spec.rb index 4377a383578167..10bb51053d422d 100644 --- a/spec/ruby/core/io/buffer/external_spec.rb +++ b/spec/ruby/core/io/buffer/external_spec.rb @@ -6,103 +6,18 @@ @buffer = nil end - context "with a buffer created with .new" do - it "is false for an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.external?.should be_false - end - - it "is false for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.external?.should be_false - end - end - - context "with a file-backed buffer created with .map" do - it "is true for a regular mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.external?.should be_true - end - end - - ruby_version_is "3.3" do - it "is false for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.external?.should be_false - end - end - end - end - - context "with a String-backed buffer created with .for" do - it "is true for a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.external?.should be_true - end - - it "is true for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.external?.should be_true - end - end + it "is true for a buffer with externally-managed memory" do + @buffer = IO::Buffer.for("string") + @buffer.external?.should be_true end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "is true" do - IO::Buffer.string(4) do |buffer| - buffer.external?.should be_true - end - end - end + it "is false for a buffer with self-managed memory" do + @buffer = IO::Buffer.new(12, IO::Buffer::MAPPED) + @buffer.external?.should be_false end - # Always false for slices - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.external?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.external?.should be_false - end - end - - context "created with .map" do - it "is false" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.external?.should be_false - end - end - end - - context "created with .for" do - it "is false when slicing a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.slice.external?.should be_false - end - - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.external?.should be_false - end - end - end - - ruby_version_is "3.3" do - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.external?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.external?.should be_false end end diff --git a/spec/ruby/core/io/buffer/for_spec.rb b/spec/ruby/core/io/buffer/for_spec.rb new file mode 100644 index 00000000000000..d59a2a033afb3b --- /dev/null +++ b/spec/ruby/core/io/buffer/for_spec.rb @@ -0,0 +1,94 @@ +require_relative '../../../spec_helper' + +describe "IO::Buffer.for" do + before :each do + @string = +"för striñg" + end + + after :each do + @buffer&.free + @buffer = nil + end + + context "without a block" do + it "copies string's contents, creating a separate read-only buffer" do + @buffer = IO::Buffer.for(@string) + + @buffer.size.should == @string.bytesize + @buffer.get_string.should == @string.b + + @string[0] = "d" + @buffer.get_string(0, 1).should == "f".b + + -> { @buffer.set_string("d") }.should raise_error(IO::Buffer::AccessError, "Buffer is not writable!") + end + + it "creates an external, read-only buffer" do + @buffer = IO::Buffer.for(@string) + + @buffer.should_not.internal? + @buffer.should_not.mapped? + @buffer.should.external? + + @buffer.should_not.empty? + @buffer.should_not.null? + + @buffer.should_not.shared? + @buffer.should_not.private? + @buffer.should.readonly? + + @buffer.should_not.locked? + @buffer.should.valid? + end + end + + context "with a block" do + it "returns the last value in the block" do + value = + IO::Buffer.for(@string) do |buffer| + buffer.size * 3 + end + value.should == @string.bytesize * 3 + end + + it "frees the buffer at the end of the block" do + IO::Buffer.for(@string) do |buffer| + @buffer = buffer + @buffer.should_not.null? + end + @buffer.should.null? + end + + context "if string is not frozen" do + it "creates a modifiable string-backed buffer" do + IO::Buffer.for(@string) do |buffer| + buffer.size.should == @string.bytesize + buffer.get_string.should == @string.b + + buffer.should_not.readonly? + + buffer.set_string("ghost shell") + @string.should == "ghost shellg" + end + end + + it "locks the original string to prevent modification" do + IO::Buffer.for(@string) do |_buffer| + -> { @string[0] = "t" }.should raise_error(RuntimeError, "can't modify string; temporarily locked") + end + @string[1] = "u" + @string.should == "fur striñg" + end + end + + context "if string is frozen" do + it "creates a read-only string-backed buffer" do + IO::Buffer.for(@string.freeze) do |buffer| + buffer.should.readonly? + + -> { buffer.set_string("ghost shell") }.should raise_error(IO::Buffer::AccessError, "Buffer is not writable!") + end + end + end + end +end diff --git a/spec/ruby/core/io/buffer/free_spec.rb b/spec/ruby/core/io/buffer/free_spec.rb index f3a491897849ae..9a141e11f6b728 100644 --- a/spec/ruby/core/io/buffer/free_spec.rb +++ b/spec/ruby/core/io/buffer/free_spec.rb @@ -49,17 +49,15 @@ end end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "disassociates the buffer from the string and nullifies the buffer" do - string = - IO::Buffer.string(4) do |buffer| - buffer.set_string("meat") - buffer.free - buffer.null?.should be_true - end - string.should == "meat" - end + context "with a String-backed buffer created with .string" do + it "disassociates the buffer from the string and nullifies the buffer" do + string = + IO::Buffer.string(4) do |buffer| + buffer.set_string("meat") + buffer.free + buffer.null?.should be_true + end + string.should == "meat" end end diff --git a/spec/ruby/core/io/buffer/initialize_spec.rb b/spec/ruby/core/io/buffer/initialize_spec.rb index c86d1e7f1d634a..90b501f53d9a92 100644 --- a/spec/ruby/core/io/buffer/initialize_spec.rb +++ b/spec/ruby/core/io/buffer/initialize_spec.rb @@ -14,14 +14,18 @@ it "creates a buffer with default state" do @buffer = IO::Buffer.new + + @buffer.should_not.external? + @buffer.should_not.shared? + @buffer.should_not.private? @buffer.should_not.readonly? @buffer.should_not.empty? @buffer.should_not.null? - # This is run-time state, set by #locked. @buffer.should_not.locked? + @buffer.should.valid? end context "with size argument" do @@ -29,25 +33,24 @@ size = IO::Buffer::PAGE_SIZE - 1 @buffer = IO::Buffer.new(size) @buffer.size.should == size + @buffer.should_not.empty? + @buffer.should.internal? @buffer.should_not.mapped? - @buffer.should_not.empty? end it "creates a new mapped buffer if size is greater than or equal to IO::Buffer::PAGE_SIZE" do size = IO::Buffer::PAGE_SIZE @buffer = IO::Buffer.new(size) @buffer.size.should == size + @buffer.should_not.empty? + @buffer.should_not.internal? @buffer.should.mapped? - @buffer.should_not.empty? end it "creates a null buffer if size is 0" do @buffer = IO::Buffer.new(0) - @buffer.size.should.zero? - @buffer.should_not.internal? - @buffer.should_not.mapped? @buffer.should.null? @buffer.should.empty? end @@ -77,27 +80,40 @@ @buffer.should_not.empty? end + it "allows extra flags" do + @buffer = IO::Buffer.new(10, IO::Buffer::INTERNAL | IO::Buffer::SHARED | IO::Buffer::READONLY) + @buffer.should.internal? + @buffer.should.shared? + @buffer.should.readonly? + end + + it "ignores flags if size is 0" do + @buffer = IO::Buffer.new(0, 0xffff) + @buffer.should.null? + @buffer.should.empty? + + @buffer.should_not.internal? + @buffer.should_not.mapped? + @buffer.should_not.external? + + @buffer.should_not.shared? + @buffer.should_not.readonly? + + @buffer.should_not.locked? + @buffer.should.valid? + end + it "raises IO::Buffer::AllocationError if neither IO::Buffer::MAPPED nor IO::Buffer::INTERNAL is given" do -> { IO::Buffer.new(10, IO::Buffer::READONLY) }.should raise_error(IO::Buffer::AllocationError, "Could not allocate buffer!") -> { IO::Buffer.new(10, 0) }.should raise_error(IO::Buffer::AllocationError, "Could not allocate buffer!") end - ruby_version_is "3.3" do - it "raises ArgumentError if flags is negative" do - -> { IO::Buffer.new(10, -1) }.should raise_error(ArgumentError, "Flags can't be negative!") - end - end - - ruby_version_is ""..."3.3" do - it "raises IO::Buffer::AllocationError with non-Integer flags" do - -> { IO::Buffer.new(10, 0.0) }.should raise_error(IO::Buffer::AllocationError, "Could not allocate buffer!") - end + it "raises ArgumentError if flags is negative" do + -> { IO::Buffer.new(10, -1) }.should raise_error(ArgumentError, "Flags can't be negative!") end - ruby_version_is "3.3" do - it "raises TypeError with non-Integer flags" do - -> { IO::Buffer.new(10, 0.0) }.should raise_error(TypeError, "not an Integer") - end + it "raises TypeError with non-Integer flags" do + -> { IO::Buffer.new(10, 0.0) }.should raise_error(TypeError, "not an Integer") end end end diff --git a/spec/ruby/core/io/buffer/internal_spec.rb b/spec/ruby/core/io/buffer/internal_spec.rb index 409699cc3c9230..40dc633d5d7fd0 100644 --- a/spec/ruby/core/io/buffer/internal_spec.rb +++ b/spec/ruby/core/io/buffer/internal_spec.rb @@ -6,103 +6,18 @@ @buffer = nil end - context "with a buffer created with .new" do - it "is true for an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.internal?.should be_true - end - - it "is false for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.internal?.should be_false - end - end - - context "with a file-backed buffer created with .map" do - it "is false for a regular mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.internal?.should be_false - end - end - - ruby_version_is "3.3" do - it "is false for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.internal?.should be_false - end - end - end - end - - context "with a String-backed buffer created with .for" do - it "is false for a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.internal?.should be_false - end - - it "is false for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.internal?.should be_false - end - end + it "is true for an internally-allocated buffer" do + @buffer = IO::Buffer.new(12) + @buffer.internal?.should be_true end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.internal?.should be_false - end - end - end + it "is false for an externally-allocated buffer" do + @buffer = IO::Buffer.new(12, IO::Buffer::MAPPED) + @buffer.internal?.should be_false end - # Always false for slices - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.internal?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.internal?.should be_false - end - end - - context "created with .map" do - it "is false" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.internal?.should be_false - end - end - end - - context "created with .for" do - it "is false when slicing a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.slice.internal?.should be_false - end - - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.internal?.should be_false - end - end - end - - ruby_version_is "3.3" do - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.internal?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.internal?.should be_false end end diff --git a/spec/ruby/core/io/buffer/map_spec.rb b/spec/ruby/core/io/buffer/map_spec.rb new file mode 100644 index 00000000000000..d980eb0ae0451a --- /dev/null +++ b/spec/ruby/core/io/buffer/map_spec.rb @@ -0,0 +1,357 @@ +require_relative '../../../spec_helper' + +describe "IO::Buffer.map" do + before :all do + @big_file_name = tmp("big_file") + # Usually 4 kibibytes + 16 bytes + File.write(@big_file_name, "12345678" * (IO::Buffer::PAGE_SIZE / 8 + 2)) + end + + after :all do + File.delete(@big_file_name) + end + + def open_fixture + File.open("#{__dir__}/../fixtures/read_text.txt", "r+") + end + + def open_big_file_fixture + File.open(@big_file_name, "r+") + end + + after :each do + @buffer&.free + @buffer = nil + @file&.close + @file = nil + end + + it "creates a new buffer mapped from a file" do + @file = open_fixture + @buffer = IO::Buffer.map(@file) + + @buffer.size.should == 9 + @buffer.get_string.should == "abcâdef\n".b + end + + it "allows to close the file after creating buffer, retaining mapping" do + file = open_fixture + @buffer = IO::Buffer.map(file) + file.close + + @buffer.get_string.should == "abcâdef\n".b + end + + it "creates a mapped, external, shared buffer" do + @file = open_fixture + @buffer = IO::Buffer.map(@file) + + @buffer.should_not.internal? + @buffer.should.mapped? + @buffer.should.external? + + @buffer.should_not.empty? + @buffer.should_not.null? + + @buffer.should.shared? + @buffer.should_not.private? + @buffer.should_not.readonly? + + @buffer.should_not.locked? + @buffer.should.valid? + end + + platform_is_not :windows do + it "is shareable across processes" do + file_name = tmp("shared_buffer") + @file = File.open(file_name, "w+") + @file << "I'm private" + @file.rewind + @buffer = IO::Buffer.map(@file) + + IO.popen("-") do |child_pipe| + if child_pipe + # Synchronize on child's output. + child_pipe.readlines.first.chomp.should == @buffer.to_s + @buffer.get_string.should == "I'm shared!" + + @file.read.should == "I'm shared!" + else + @buffer.set_string("I'm shared!") + puts @buffer + end + ensure + child_pipe&.close + end + ensure + File.unlink(file_name) + end + end + + context "with an empty file" do + ruby_version_is ""..."4.0" do + it "raises a SystemCallError" do + @file = File.open("#{__dir__}/../fixtures/empty.txt", "r+") + -> { IO::Buffer.map(@file) }.should raise_error(SystemCallError) + end + end + + ruby_version_is "4.0" do + it "raises ArgumentError" do + @file = File.open("#{__dir__}/../fixtures/empty.txt", "r+") + -> { IO::Buffer.map(@file) }.should raise_error(ArgumentError, "Invalid negative or zero file size!") + end + end + end + + context "with a file opened only for reading" do + it "raises a SystemCallError if no flags are used" do + @file = File.open("#{__dir__}/../fixtures/read_text.txt", "r") + -> { IO::Buffer.map(@file) }.should raise_error(SystemCallError) + end + end + + context "with size argument" do + it "limits the buffer to the specified size in bytes, starting from the start of the file" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, 4) + + @buffer.size.should == 4 + @buffer.get_string.should == "abc\xC3".b + end + + it "maps the whole file if size is nil" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, nil) + + @buffer.size.should == 9 + end + + context "if size is 0" do + ruby_version_is ""..."4.0" do + platform_is_not :windows do + it "raises a SystemCallError" do + @file = open_fixture + -> { IO::Buffer.map(@file, 0) }.should raise_error(SystemCallError) + end + end + end + + ruby_version_is "4.0" do + it "raises ArgumentError" do + @file = open_fixture + -> { IO::Buffer.map(@file, 0) }.should raise_error(ArgumentError, "Size can't be zero!") + end + end + end + + it "raises TypeError if size is not an Integer or nil" do + @file = open_fixture + -> { IO::Buffer.map(@file, "10") }.should raise_error(TypeError, "not an Integer") + -> { IO::Buffer.map(@file, 10.0) }.should raise_error(TypeError, "not an Integer") + end + + it "raises ArgumentError if size is negative" do + @file = open_fixture + -> { IO::Buffer.map(@file, -1) }.should raise_error(ArgumentError, "Size can't be negative!") + end + + ruby_version_is ""..."4.0" do + # May or may not cause a crash on access. + it "is undefined behavior if size is larger than file size" + end + + ruby_version_is "4.0" do + it "raises ArgumentError if size is larger than file size" do + @file = open_fixture + -> { IO::Buffer.map(@file, 8192) }.should raise_error(ArgumentError, "Size can't be larger than file size!") + end + end + end + + context "with size and offset arguments" do + # Neither Windows nor macOS have clear, stable behavior with non-zero offset. + # https://bugs.ruby-lang.org/issues/21700 + platform_is :linux do + context "if offset is an allowed value for system call" do + it "maps the span specified by size starting from the offset" do + @file = open_big_file_fixture + @buffer = IO::Buffer.map(@file, 14, IO::Buffer::PAGE_SIZE) + + @buffer.size.should == 14 + @buffer.get_string(0, 14).should == "12345678123456" + end + + context "if size is nil" do + ruby_version_is ""..."4.0" do + it "maps the rest of the file" do + @file = open_big_file_fixture + @buffer = IO::Buffer.map(@file, nil, IO::Buffer::PAGE_SIZE) + + @buffer.get_string(0, 1).should == "1" + end + + it "incorrectly sets buffer's size to file's full size" do + @file = open_big_file_fixture + @buffer = IO::Buffer.map(@file, nil, IO::Buffer::PAGE_SIZE) + + @buffer.size.should == @file.size + end + end + + ruby_version_is "4.0" do + it "maps the rest of the file" do + @file = open_big_file_fixture + @buffer = IO::Buffer.map(@file, nil, IO::Buffer::PAGE_SIZE) + + @buffer.get_string(0, 1).should == "1" + end + + it "sets buffer's size to file's remaining size" do + @file = open_big_file_fixture + @buffer = IO::Buffer.map(@file, nil, IO::Buffer::PAGE_SIZE) + + @buffer.size.should == (@file.size - IO::Buffer::PAGE_SIZE) + end + end + end + end + end + + it "maps the file from the start if offset is 0" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, 4, 0) + + @buffer.size.should == 4 + @buffer.get_string.should == "abc\xC3".b + end + + ruby_version_is ""..."4.0" do + # May or may not cause a crash on access. + it "is undefined behavior if offset+size is larger than file size" + end + + ruby_version_is "4.0" do + it "raises ArgumentError if offset+size is larger than file size" do + @file = open_big_file_fixture + -> { IO::Buffer.map(@file, 17, IO::Buffer::PAGE_SIZE) }.should raise_error(ArgumentError, "Offset too large!") + ensure + # Windows requires the file to be closed before deletion. + @file.close unless @file.closed? + end + end + + it "raises TypeError if offset is not convertible to Integer" do + @file = open_fixture + -> { IO::Buffer.map(@file, 4, "4096") }.should raise_error(TypeError, /no implicit conversion/) + -> { IO::Buffer.map(@file, 4, nil) }.should raise_error(TypeError, /no implicit conversion/) + end + + it "raises a SystemCallError if offset is not an allowed value" do + @file = open_fixture + -> { IO::Buffer.map(@file, 4, 3) }.should raise_error(SystemCallError) + end + + ruby_version_is ""..."4.0" do + it "raises a SystemCallError if offset is negative" do + @file = open_fixture + -> { IO::Buffer.map(@file, 4, -1) }.should raise_error(SystemCallError) + end + end + + ruby_version_is "4.0" do + it "raises ArgumentError if offset is negative" do + @file = open_fixture + -> { IO::Buffer.map(@file, 4, -1) }.should raise_error(ArgumentError, "Offset can't be negative!") + end + end + end + + context "with flags argument" do + context "when READONLY flag is specified" do + it "sets readonly flag on the buffer, allowing only reads" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::READONLY) + + @buffer.should.readonly? + + @buffer.get_string.should == "abc\xC3\xA2def\n".b + end + + it "allows mapping read-only files" do + @file = File.open("#{__dir__}/../fixtures/read_text.txt", "r") + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::READONLY) + + @buffer.should.readonly? + + @buffer.get_string.should == "abc\xC3\xA2def\n".b + end + + it "causes IO::Buffer::AccessError on write" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::READONLY) + + -> { @buffer.set_string("test") }.should raise_error(IO::Buffer::AccessError, "Buffer is not writable!") + end + end + + context "when PRIVATE is specified" do + it "sets private flag on the buffer, making it freely modifiable" do + @file = open_fixture + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::PRIVATE) + + @buffer.should.private? + @buffer.should_not.shared? + @buffer.should_not.external? + + @buffer.get_string.should == "abc\xC3\xA2def\n".b + @buffer.set_string("test12345") + @buffer.get_string.should == "test12345".b + + @file.read.should == "abcâdef\n" + end + + it "allows mapping read-only files and modifying the buffer" do + @file = File.open("#{__dir__}/../fixtures/read_text.txt", "r") + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::PRIVATE) + + @buffer.should.private? + @buffer.should_not.shared? + @buffer.should_not.external? + + @buffer.get_string.should == "abc\xC3\xA2def\n".b + @buffer.set_string("test12345") + @buffer.get_string.should == "test12345".b + + @file.read.should == "abcâdef\n" + end + + platform_is_not :windows do + it "is not shared across processes" do + file_name = tmp("shared_buffer") + @file = File.open(file_name, "w+") + @file << "I'm private" + @file.rewind + @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::PRIVATE) + + IO.popen("-") do |child_pipe| + if child_pipe + # Synchronize on child's output. + child_pipe.readlines.first.chomp.should == @buffer.to_s + @buffer.get_string.should == "I'm private" + + @file.read.should == "I'm private" + else + @buffer.set_string("I'm shared!") + puts @buffer + end + ensure + child_pipe&.close + end + ensure + File.unlink(file_name) + end + end + end + end +end diff --git a/spec/ruby/core/io/buffer/mapped_spec.rb b/spec/ruby/core/io/buffer/mapped_spec.rb index b3610207ffb100..13dc548ed26e72 100644 --- a/spec/ruby/core/io/buffer/mapped_spec.rb +++ b/spec/ruby/core/io/buffer/mapped_spec.rb @@ -6,103 +6,18 @@ @buffer = nil end - context "with a buffer created with .new" do - it "is false for an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.mapped?.should be_false - end - - it "is true for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.mapped?.should be_true - end - end - - context "with a file-backed buffer created with .map" do - it "is true for a regular mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.mapped?.should be_true - end - end - - ruby_version_is "3.3" do - it "is true for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.mapped?.should be_true - end - end - end - end - - context "with a String-backed buffer created with .for" do - it "is false for a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.mapped?.should be_false - end - - it "is false for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.mapped?.should be_false - end - end + it "is true for a buffer with mapped memory" do + @buffer = IO::Buffer.new(12, IO::Buffer::MAPPED) + @buffer.mapped?.should be_true end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.mapped?.should be_false - end - end - end + it "is false for a buffer with non-mapped memory" do + @buffer = IO::Buffer.for("string") + @buffer.mapped?.should be_false end - # Always false for slices - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.mapped?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.mapped?.should be_false - end - end - - context "created with .map" do - it "is false" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.mapped?.should be_false - end - end - end - - context "created with .for" do - it "is false when slicing a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.slice.mapped?.should be_false - end - - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.mapped?.should be_false - end - end - end - - ruby_version_is "3.3" do - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.mapped?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.mapped?.should be_false end end diff --git a/spec/ruby/core/io/buffer/null_spec.rb b/spec/ruby/core/io/buffer/null_spec.rb index 3fb1144d0ed66f..3a0e7f841bf94d 100644 --- a/spec/ruby/core/io/buffer/null_spec.rb +++ b/spec/ruby/core/io/buffer/null_spec.rb @@ -14,11 +14,9 @@ @buffer.null?.should be_false end - ruby_version_is "3.3" do - it "is false for a 0-length String-backed buffer created with .string" do - IO::Buffer.string(0) do |buffer| - buffer.null?.should be_false - end + it "is false for a 0-length String-backed buffer created with .string" do + IO::Buffer.string(0) do |buffer| + buffer.null?.should be_false end end diff --git a/spec/ruby/core/io/buffer/private_spec.rb b/spec/ruby/core/io/buffer/private_spec.rb index 7aa308997b1939..86b7a7a0d0b391 100644 --- a/spec/ruby/core/io/buffer/private_spec.rb +++ b/spec/ruby/core/io/buffer/private_spec.rb @@ -1,111 +1,23 @@ require_relative '../../../spec_helper' -ruby_version_is "3.3" do - describe "IO::Buffer#private?" do - after :each do - @buffer&.free - @buffer = nil - end - - context "with a buffer created with .new" do - it "is false for an internal buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::INTERNAL) - @buffer.private?.should be_false - end - - it "is false for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.private?.should be_false - end - end - - context "with a file-backed buffer created with .map" do - it "is false for a regular mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.private?.should be_false - end - end - - it "is true for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.private?.should be_true - end - end - end - - context "with a String-backed buffer created with .for" do - it "is false for a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.private?.should be_false - end - - it "is false for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.private?.should be_false - end - end - end - - context "with a String-backed buffer created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.private?.should be_false - end - end - end - - # Always false for slices - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.private?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.private?.should be_false - end - end - - context "created with .map" do - it "is false when slicing a regular file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.private?.should be_false - end - end - - it "is false when slicing a private file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.slice.private?.should be_false - end - end - end +describe "IO::Buffer#private?" do + after :each do + @buffer&.free + @buffer = nil + end - context "created with .for" do - it "is false when slicing a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.slice.private?.should be_false - end + it "is true for a buffer created with PRIVATE flag" do + @buffer = IO::Buffer.new(12, IO::Buffer::INTERNAL | IO::Buffer::PRIVATE) + @buffer.private?.should be_true + end - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.private?.should be_false - end - end - end + it "is false for a buffer created without PRIVATE flag" do + @buffer = IO::Buffer.new(12, IO::Buffer::INTERNAL) + @buffer.private?.should be_false + end - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.private?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.private?.should be_false end end diff --git a/spec/ruby/core/io/buffer/readonly_spec.rb b/spec/ruby/core/io/buffer/readonly_spec.rb index 0014a876ed743e..2fc7d340b77b80 100644 --- a/spec/ruby/core/io/buffer/readonly_spec.rb +++ b/spec/ruby/core/io/buffer/readonly_spec.rb @@ -6,138 +6,23 @@ @buffer = nil end - context "with a buffer created with .new" do - it "is false for an internal buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::INTERNAL) - @buffer.readonly?.should be_false - end - - it "is false for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.readonly?.should be_false - end - end - - context "with a file-backed buffer created with .map" do - it "is false for a writable mapping" do - File.open(__FILE__, "r+") do |file| - @buffer = IO::Buffer.map(file) - @buffer.readonly?.should be_false - end - end - - it "is true for a readonly mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.readonly?.should be_true - end - end - - ruby_version_is "3.3" do - it "is false for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::PRIVATE) - @buffer.readonly?.should be_false - end - end - end + it "is true for a buffer created with READONLY flag" do + @buffer = IO::Buffer.new(12, IO::Buffer::INTERNAL | IO::Buffer::READONLY) + @buffer.readonly?.should be_true end - context "with a String-backed buffer created with .for" do - it "is true for a buffer created without a block" do - @buffer = IO::Buffer.for(+"test") - @buffer.readonly?.should be_true - end - - it "is false for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.readonly?.should be_false - end - end - - it "is true for a buffer created with a block from a frozen string" do - IO::Buffer.for(-"test") do |buffer| - buffer.readonly?.should be_true - end - end + it "is true for a buffer that is non-writable" do + @buffer = IO::Buffer.for("string") + @buffer.readonly?.should be_true end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.readonly?.should be_false - end - end - end + it "is false for a modifiable buffer" do + @buffer = IO::Buffer.new(12) + @buffer.readonly?.should be_false end - # This seems to be the only flag propagated from the source buffer to the slice. - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.readonly?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.readonly?.should be_false - end - end - - context "created with .map" do - it "is false when slicing a read-write file-backed buffer" do - File.open(__FILE__, "r+") do |file| - @buffer = IO::Buffer.map(file) - @buffer.slice.readonly?.should be_false - end - end - - it "is true when slicing a readonly file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.readonly?.should be_true - end - end - - ruby_version_is "3.3" do - it "is false when slicing a private file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::PRIVATE) - @buffer.slice.readonly?.should be_false - end - end - end - end - - context "created with .for" do - it "is true when slicing a buffer created without a block" do - @buffer = IO::Buffer.for(+"test") - @buffer.slice.readonly?.should be_true - end - - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.readonly?.should be_false - end - end - - it "is true when slicing a buffer created with a block from a frozen string" do - IO::Buffer.for(-"test") do |buffer| - buffer.slice.readonly?.should be_true - end - end - end - - ruby_version_is "3.3" do - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.readonly?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.readonly?.should be_false end end diff --git a/spec/ruby/core/io/buffer/resize_spec.rb b/spec/ruby/core/io/buffer/resize_spec.rb index 0da3a23356c0dc..a5e80439dac653 100644 --- a/spec/ruby/core/io/buffer/resize_spec.rb +++ b/spec/ruby/core/io/buffer/resize_spec.rb @@ -44,17 +44,15 @@ end end - ruby_version_is "3.3" do - it "resizes private buffer, discarding excess contents" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::PRIVATE) - @buffer.resize(10) - @buffer.size.should == 10 - @buffer.get_string.should == "require_re" - @buffer.resize(12) - @buffer.size.should == 12 - @buffer.get_string.should == "require_re\0\0" - end + it "resizes private buffer, discarding excess contents" do + File.open(__FILE__, "r") do |file| + @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::PRIVATE) + @buffer.resize(10) + @buffer.size.should == 10 + @buffer.get_string.should == "require_re" + @buffer.resize(12) + @buffer.size.should == 12 + @buffer.get_string.should == "require_re\0\0" end end end @@ -76,12 +74,10 @@ end end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "disallows resizing, raising IO::Buffer::AccessError" do - IO::Buffer.string(4) do |buffer| - -> { buffer.resize(10) }.should raise_error(IO::Buffer::AccessError, "Cannot resize external buffer!") - end + context "with a String-backed buffer created with .string" do + it "disallows resizing, raising IO::Buffer::AccessError" do + IO::Buffer.string(4) do |buffer| + -> { buffer.resize(10) }.should raise_error(IO::Buffer::AccessError, "Cannot resize external buffer!") end end end diff --git a/spec/ruby/core/io/buffer/shared/null_and_empty.rb b/spec/ruby/core/io/buffer/shared/null_and_empty.rb index c8fe9e5e46ca9b..2ff5cf8f410db7 100644 --- a/spec/ruby/core/io/buffer/shared/null_and_empty.rb +++ b/spec/ruby/core/io/buffer/shared/null_and_empty.rb @@ -21,11 +21,9 @@ @buffer.send(@method).should be_false end - ruby_version_is "3.3" do - it "is false for a non-empty String-backed buffer created with .string" do - IO::Buffer.string(4) do |buffer| - buffer.send(@method).should be_false - end + it "is false for a non-empty String-backed buffer created with .string" do + IO::Buffer.string(4) do |buffer| + buffer.send(@method).should be_false end end diff --git a/spec/ruby/core/io/buffer/shared_spec.rb b/spec/ruby/core/io/buffer/shared_spec.rb index f2a638cf39f9b1..4f3bce5448fee0 100644 --- a/spec/ruby/core/io/buffer/shared_spec.rb +++ b/spec/ruby/core/io/buffer/shared_spec.rb @@ -6,112 +6,25 @@ @buffer = nil end - context "with a buffer created with .new" do - it "is false for an internal buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::INTERNAL) - @buffer.shared?.should be_false - end - - it "is false for a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.shared?.should be_false - end - end - - context "with a file-backed buffer created with .map" do - it "is true for a regular mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.shared?.should be_true - end - end - - ruby_version_is "3.3" do - it "is false for a private mapping" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.shared?.should be_false - end - end - end + it "is true for a buffer created with SHARED flag" do + @buffer = IO::Buffer.new(12, IO::Buffer::INTERNAL | IO::Buffer::SHARED) + @buffer.shared?.should be_true end - context "with a String-backed buffer created with .for" do - it "is false for a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.shared?.should be_false - end - - it "is false for a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.shared?.should be_false - end - end + it "is true for a non-private buffer created with .map" do + file = File.open("#{__dir__}/../fixtures/read_text.txt", "r+") + @buffer = IO::Buffer.map(file) + file.close + @buffer.shared?.should be_true end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.shared?.should be_false - end - end - end + it "is false for an unshared buffer" do + @buffer = IO::Buffer.new(12) + @buffer.shared?.should be_false end - # Always false for slices - context "with a slice of a buffer" do - context "created with .new" do - it "is false when slicing an internal buffer" do - @buffer = IO::Buffer.new(4) - @buffer.slice.shared?.should be_false - end - - it "is false when slicing a mapped buffer" do - @buffer = IO::Buffer.new(4, IO::Buffer::MAPPED) - @buffer.slice.shared?.should be_false - end - end - - context "created with .map" do - it "is false when slicing a regular file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY) - @buffer.slice.shared?.should be_false - end - end - - ruby_version_is "3.3" do - it "is false when slicing a private file-backed buffer" do - File.open(__FILE__, "r") do |file| - @buffer = IO::Buffer.map(file, nil, 0, IO::Buffer::READONLY | IO::Buffer::PRIVATE) - @buffer.slice.shared?.should be_false - end - end - end - end - - context "created with .for" do - it "is false when slicing a buffer created without a block" do - @buffer = IO::Buffer.for("test") - @buffer.slice.shared?.should be_false - end - - it "is false when slicing a buffer created with a block" do - IO::Buffer.for(+"test") do |buffer| - buffer.slice.shared?.should be_false - end - end - end - - ruby_version_is "3.3" do - context "created with .string" do - it "is false" do - IO::Buffer.string(4) do |buffer| - buffer.slice.shared?.should be_false - end - end - end - end + it "is false for a null buffer" do + @buffer = IO::Buffer.new(0) + @buffer.shared?.should be_false end end diff --git a/spec/ruby/core/io/buffer/string_spec.rb b/spec/ruby/core/io/buffer/string_spec.rb new file mode 100644 index 00000000000000..bc7a73075e3948 --- /dev/null +++ b/spec/ruby/core/io/buffer/string_spec.rb @@ -0,0 +1,62 @@ +require_relative '../../../spec_helper' + +describe "IO::Buffer.string" do + it "creates a modifiable buffer for the duration of the block" do + IO::Buffer.string(7) do |buffer| + @buffer = buffer + + buffer.size.should == 7 + buffer.get_string.should == "\0\0\0\0\0\0\0".b + + buffer.set_string("test") + buffer.get_string.should == "test\0\0\0" + end + @buffer.should.null? + end + + it "returns contents of the buffer as a binary string" do + string = + IO::Buffer.string(7) do |buffer| + buffer.set_string("ä test") + end + string.should == "\xC3\xA4 test".b + end + + it "creates an external buffer" do + IO::Buffer.string(8) do |buffer| + buffer.should_not.internal? + buffer.should_not.mapped? + buffer.should.external? + + buffer.should_not.empty? + buffer.should_not.null? + + buffer.should_not.shared? + buffer.should_not.private? + buffer.should_not.readonly? + + buffer.should_not.locked? + buffer.should.valid? + end + end + + it "returns an empty string if size is 0" do + string = + IO::Buffer.string(0) do |buffer| + buffer.size.should == 0 + end + string.should == "" + end + + it "raises ArgumentError if size is negative" do + -> { IO::Buffer.string(-1) {} }.should raise_error(ArgumentError, "negative string size (or size too big)") + end + + it "raises RangeError if size is too large" do + -> { IO::Buffer.string(2 ** 232) {} }.should raise_error(RangeError, /\Abignum too big to convert into [`']long'\z/) + end + + it "raises LocalJumpError if no block is given" do + -> { IO::Buffer.string(7) }.should raise_error(LocalJumpError, "no block given") + end +end diff --git a/spec/ruby/core/io/buffer/transfer_spec.rb b/spec/ruby/core/io/buffer/transfer_spec.rb index cb8c843ff24750..5b7b63e3339991 100644 --- a/spec/ruby/core/io/buffer/transfer_spec.rb +++ b/spec/ruby/core/io/buffer/transfer_spec.rb @@ -60,17 +60,15 @@ end end - ruby_version_is "3.3" do - context "with a String-backed buffer created with .string" do - it "transfers memory to a new buffer, breaking the transaction by nullifying the original" do - IO::Buffer.string(4) do |buffer| - info = buffer.to_s - @buffer = buffer.transfer - @buffer.to_s.should == info - buffer.null?.should be_true - end - @buffer.null?.should be_false + context "with a String-backed buffer created with .string" do + it "transfers memory to a new buffer, breaking the transaction by nullifying the original" do + IO::Buffer.string(4) do |buffer| + info = buffer.to_s + @buffer = buffer.transfer + @buffer.to_s.should == info + buffer.null?.should be_true end + @buffer.null?.should be_false end end diff --git a/spec/ruby/core/io/foreach_spec.rb b/spec/ruby/core/io/foreach_spec.rb index 6abe8901bac7a0..28d6fef7ae5079 100644 --- a/spec/ruby/core/io/foreach_spec.rb +++ b/spec/ruby/core/io/foreach_spec.rb @@ -47,14 +47,12 @@ end end - ruby_version_is "3.3" do - # https://bugs.ruby-lang.org/issues/19630 - it "warns about deprecation given a path with a pipe" do - cmd = "|echo ok" - -> { - IO.foreach(cmd).to_a - }.should complain(/IO process creation with a leading '\|'/) - end + # https://bugs.ruby-lang.org/issues/19630 + it "warns about deprecation given a path with a pipe" do + cmd = "|echo ok" + -> { + IO.foreach(cmd).to_a + }.should complain(/IO process creation with a leading '\|'/) end end end diff --git a/spec/ruby/core/io/gets_spec.rb b/spec/ruby/core/io/gets_spec.rb index ca64bf860e4148..0587fa07c43289 100644 --- a/spec/ruby/core/io/gets_spec.rb +++ b/spec/ruby/core/io/gets_spec.rb @@ -338,23 +338,11 @@ @io.gets.encoding.should == Encoding::BINARY end - ruby_version_is ''...'3.3' do - it "transcodes to internal encoding if the IO object's external encoding is BINARY" do - Encoding.default_external = Encoding::BINARY - Encoding.default_internal = Encoding::UTF_8 - @io = new_io @name, 'r' - @io.set_encoding Encoding::BINARY, Encoding::UTF_8 - @io.gets.encoding.should == Encoding::UTF_8 - end - end - - ruby_version_is '3.3' do - it "ignores the internal encoding if the IO object's external encoding is BINARY" do - Encoding.default_external = Encoding::BINARY - Encoding.default_internal = Encoding::UTF_8 - @io = new_io @name, 'r' - @io.set_encoding Encoding::BINARY, Encoding::UTF_8 - @io.gets.encoding.should == Encoding::BINARY - end + it "ignores the internal encoding if the IO object's external encoding is BINARY" do + Encoding.default_external = Encoding::BINARY + Encoding.default_internal = Encoding::UTF_8 + @io = new_io @name, 'r' + @io.set_encoding Encoding::BINARY, Encoding::UTF_8 + @io.gets.encoding.should == Encoding::BINARY end end diff --git a/spec/ruby/core/io/pread_spec.rb b/spec/ruby/core/io/pread_spec.rb index dc7bcedf3e5c2f..8f7d9b2521d9c9 100644 --- a/spec/ruby/core/io/pread_spec.rb +++ b/spec/ruby/core/io/pread_spec.rb @@ -1,140 +1,138 @@ # -*- encoding: utf-8 -*- require_relative '../../spec_helper' -guard -> { platform_is_not :windows or ruby_version_is "3.3" } do - describe "IO#pread" do - before :each do - @fname = tmp("io_pread.txt") - @contents = "1234567890" - touch(@fname) { |f| f.write @contents } - @file = File.open(@fname, "r+") - end - - after :each do - @file.close - rm_r @fname - end +describe "IO#pread" do + before :each do + @fname = tmp("io_pread.txt") + @contents = "1234567890" + touch(@fname) { |f| f.write @contents } + @file = File.open(@fname, "r+") + end - it "accepts a length, and an offset" do - @file.pread(4, 0).should == "1234" - @file.pread(3, 4).should == "567" - end + after :each do + @file.close + rm_r @fname + end - it "accepts a length, an offset, and an output buffer" do - buffer = +"foo" - @file.pread(3, 4, buffer).should.equal?(buffer) - buffer.should == "567" - end + it "accepts a length, and an offset" do + @file.pread(4, 0).should == "1234" + @file.pread(3, 4).should == "567" + end - it "shrinks the buffer in case of less bytes read" do - buffer = +"foo" - @file.pread(1, 0, buffer) - buffer.should == "1" - end + it "accepts a length, an offset, and an output buffer" do + buffer = +"foo" + @file.pread(3, 4, buffer).should.equal?(buffer) + buffer.should == "567" + end - it "grows the buffer in case of more bytes read" do - buffer = +"foo" - @file.pread(5, 0, buffer) - buffer.should == "12345" - end + it "shrinks the buffer in case of less bytes read" do + buffer = +"foo" + @file.pread(1, 0, buffer) + buffer.should == "1" + end - it "preserves the encoding of the given buffer" do - buffer = ''.encode(Encoding::ISO_8859_1) - @file.pread(10, 0, buffer) + it "grows the buffer in case of more bytes read" do + buffer = +"foo" + @file.pread(5, 0, buffer) + buffer.should == "12345" + end - buffer.encoding.should == Encoding::ISO_8859_1 - end + it "preserves the encoding of the given buffer" do + buffer = ''.encode(Encoding::ISO_8859_1) + @file.pread(10, 0, buffer) - it "does not advance the file pointer" do - @file.pread(4, 0).should == "1234" - @file.read.should == "1234567890" - end + buffer.encoding.should == Encoding::ISO_8859_1 + end - it "ignores the current offset" do - @file.pos = 3 - @file.pread(4, 0).should == "1234" - end + it "does not advance the file pointer" do + @file.pread(4, 0).should == "1234" + @file.read.should == "1234567890" + end - it "returns an empty string for maxlen = 0" do - @file.pread(0, 4).should == "" - end + it "ignores the current offset" do + @file.pos = 3 + @file.pread(4, 0).should == "1234" + end - it "returns a buffer for maxlen = 0 when buffer specified" do - buffer = +"foo" - @file.pread(0, 4, buffer).should.equal?(buffer) - buffer.should == "foo" - end + it "returns an empty string for maxlen = 0" do + @file.pread(0, 4).should == "" + end - it "ignores the offset for maxlen = 0, even if it is out of file bounds" do - @file.pread(0, 400).should == "" - end + it "returns a buffer for maxlen = 0 when buffer specified" do + buffer = +"foo" + @file.pread(0, 4, buffer).should.equal?(buffer) + buffer.should == "foo" + end - it "does not reset the buffer when reading with maxlen = 0" do - buffer = +"foo" - @file.pread(0, 4, buffer) - buffer.should == "foo" + it "ignores the offset for maxlen = 0, even if it is out of file bounds" do + @file.pread(0, 400).should == "" + end - @file.pread(0, 400, buffer) - buffer.should == "foo" - end + it "does not reset the buffer when reading with maxlen = 0" do + buffer = +"foo" + @file.pread(0, 4, buffer) + buffer.should == "foo" - it "converts maxlen to Integer using #to_int" do - maxlen = mock('maxlen') - maxlen.should_receive(:to_int).and_return(4) - @file.pread(maxlen, 0).should == "1234" - end + @file.pread(0, 400, buffer) + buffer.should == "foo" + end - it "converts offset to Integer using #to_int" do - offset = mock('offset') - offset.should_receive(:to_int).and_return(0) - @file.pread(4, offset).should == "1234" - end + it "converts maxlen to Integer using #to_int" do + maxlen = mock('maxlen') + maxlen.should_receive(:to_int).and_return(4) + @file.pread(maxlen, 0).should == "1234" + end - it "converts a buffer to String using to_str" do - buffer = mock('buffer') - buffer.should_receive(:to_str).at_least(1).and_return(+"foo") - @file.pread(4, 0, buffer) - buffer.should_not.is_a?(String) - buffer.to_str.should == "1234" - end + it "converts offset to Integer using #to_int" do + offset = mock('offset') + offset.should_receive(:to_int).and_return(0) + @file.pread(4, offset).should == "1234" + end - it "raises TypeError if maxlen is not an Integer and cannot be coerced into Integer" do - maxlen = Object.new - -> { @file.pread(maxlen, 0) }.should raise_error(TypeError, 'no implicit conversion of Object into Integer') - end + it "converts a buffer to String using to_str" do + buffer = mock('buffer') + buffer.should_receive(:to_str).at_least(1).and_return(+"foo") + @file.pread(4, 0, buffer) + buffer.should_not.is_a?(String) + buffer.to_str.should == "1234" + end - it "raises TypeError if offset is not an Integer and cannot be coerced into Integer" do - offset = Object.new - -> { @file.pread(4, offset) }.should raise_error(TypeError, 'no implicit conversion of Object into Integer') - end + it "raises TypeError if maxlen is not an Integer and cannot be coerced into Integer" do + maxlen = Object.new + -> { @file.pread(maxlen, 0) }.should raise_error(TypeError, 'no implicit conversion of Object into Integer') + end - it "raises ArgumentError for negative values of maxlen" do - -> { @file.pread(-4, 0) }.should raise_error(ArgumentError, 'negative string size (or size too big)') - end + it "raises TypeError if offset is not an Integer and cannot be coerced into Integer" do + offset = Object.new + -> { @file.pread(4, offset) }.should raise_error(TypeError, 'no implicit conversion of Object into Integer') + end - it "raised Errno::EINVAL for negative values of offset" do - -> { @file.pread(4, -1) }.should raise_error(Errno::EINVAL, /Invalid argument/) - end + it "raises ArgumentError for negative values of maxlen" do + -> { @file.pread(-4, 0) }.should raise_error(ArgumentError, 'negative string size (or size too big)') + end - it "raises TypeError if the buffer is not a String and cannot be coerced into String" do - buffer = Object.new - -> { @file.pread(4, 0, buffer) }.should raise_error(TypeError, 'no implicit conversion of Object into String') - end + it "raised Errno::EINVAL for negative values of offset" do + -> { @file.pread(4, -1) }.should raise_error(Errno::EINVAL, /Invalid argument/) + end - it "raises EOFError if end-of-file is reached" do - -> { @file.pread(1, 10) }.should raise_error(EOFError) - end + it "raises TypeError if the buffer is not a String and cannot be coerced into String" do + buffer = Object.new + -> { @file.pread(4, 0, buffer) }.should raise_error(TypeError, 'no implicit conversion of Object into String') + end - it "raises IOError when file is not open in read mode" do - File.open(@fname, "w") do |file| - -> { file.pread(1, 1) }.should raise_error(IOError) - end - end + it "raises EOFError if end-of-file is reached" do + -> { @file.pread(1, 10) }.should raise_error(EOFError) + end - it "raises IOError when file is closed" do - file = File.open(@fname, "r+") - file.close + it "raises IOError when file is not open in read mode" do + File.open(@fname, "w") do |file| -> { file.pread(1, 1) }.should raise_error(IOError) end end + + it "raises IOError when file is closed" do + file = File.open(@fname, "r+") + file.close + -> { file.pread(1, 1) }.should raise_error(IOError) + end end diff --git a/spec/ruby/core/io/pwrite_spec.rb b/spec/ruby/core/io/pwrite_spec.rb index 2bc508b37d1660..fd0b6cf380c463 100644 --- a/spec/ruby/core/io/pwrite_spec.rb +++ b/spec/ruby/core/io/pwrite_spec.rb @@ -1,69 +1,67 @@ # -*- encoding: utf-8 -*- require_relative '../../spec_helper' -guard -> { platform_is_not :windows or ruby_version_is "3.3" } do - describe "IO#pwrite" do - before :each do - @fname = tmp("io_pwrite.txt") - @file = File.open(@fname, "w+") - end +describe "IO#pwrite" do + before :each do + @fname = tmp("io_pwrite.txt") + @file = File.open(@fname, "w+") + end - after :each do - @file.close - rm_r @fname - end + after :each do + @file.close + rm_r @fname + end - it "returns the number of bytes written" do - @file.pwrite("foo", 0).should == 3 - end + it "returns the number of bytes written" do + @file.pwrite("foo", 0).should == 3 + end - it "accepts a string and an offset" do - @file.pwrite("foo", 2) - @file.pread(3, 2).should == "foo" - end + it "accepts a string and an offset" do + @file.pwrite("foo", 2) + @file.pread(3, 2).should == "foo" + end - it "does not advance the pointer in the file" do - @file.pwrite("bar", 3) - @file.write("foo") - @file.pread(6, 0).should == "foobar" - end + it "does not advance the pointer in the file" do + @file.pwrite("bar", 3) + @file.write("foo") + @file.pread(6, 0).should == "foobar" + end - it "calls #to_s on the object to be written" do - object = mock("to_s") - object.should_receive(:to_s).and_return("foo") - @file.pwrite(object, 0) - @file.pread(3, 0).should == "foo" - end + it "calls #to_s on the object to be written" do + object = mock("to_s") + object.should_receive(:to_s).and_return("foo") + @file.pwrite(object, 0) + @file.pread(3, 0).should == "foo" + end - it "calls #to_int on the offset" do - offset = mock("to_int") - offset.should_receive(:to_int).and_return(2) - @file.pwrite("foo", offset) - @file.pread(3, 2).should == "foo" - end + it "calls #to_int on the offset" do + offset = mock("to_int") + offset.should_receive(:to_int).and_return(2) + @file.pwrite("foo", offset) + @file.pread(3, 2).should == "foo" + end - it "raises IOError when file is not open in write mode" do - File.open(@fname, "r") do |file| - -> { file.pwrite("foo", 1) }.should raise_error(IOError, "not opened for writing") - end + it "raises IOError when file is not open in write mode" do + File.open(@fname, "r") do |file| + -> { file.pwrite("foo", 1) }.should raise_error(IOError, "not opened for writing") end + end - it "raises IOError when file is closed" do - file = File.open(@fname, "w+") - file.close - -> { file.pwrite("foo", 1) }.should raise_error(IOError, "closed stream") - end + it "raises IOError when file is closed" do + file = File.open(@fname, "w+") + file.close + -> { file.pwrite("foo", 1) }.should raise_error(IOError, "closed stream") + end - it "raises a NoMethodError if object does not respond to #to_s" do - -> { - @file.pwrite(BasicObject.new, 0) - }.should raise_error(NoMethodError, /undefined method [`']to_s'/) - end + it "raises a NoMethodError if object does not respond to #to_s" do + -> { + @file.pwrite(BasicObject.new, 0) + }.should raise_error(NoMethodError, /undefined method [`']to_s'/) + end - it "raises a TypeError if the offset cannot be converted to an Integer" do - -> { - @file.pwrite("foo", Object.new) - }.should raise_error(TypeError, "no implicit conversion of Object into Integer") - end + it "raises a TypeError if the offset cannot be converted to an Integer" do + -> { + @file.pwrite("foo", Object.new) + }.should raise_error(TypeError, "no implicit conversion of Object into Integer") end end diff --git a/spec/ruby/core/io/read_spec.rb b/spec/ruby/core/io/read_spec.rb index 988ec2ce30df25..dfb42e09db7681 100644 --- a/spec/ruby/core/io/read_spec.rb +++ b/spec/ruby/core/io/read_spec.rb @@ -65,15 +65,6 @@ end platform_is_not :windows do - ruby_version_is ""..."3.3" do - it "uses an :open_args option" do - string = IO.read(@fname, nil, 0, open_args: ["r", nil, {encoding: Encoding::US_ASCII}]) - string.encoding.should == Encoding::US_ASCII - - string = IO.read(@fname, nil, 0, open_args: ["r", nil, {}]) - string.encoding.should == Encoding::UTF_8 - end - end end it "disregards other options if :open_args is given" do @@ -135,18 +126,9 @@ -> { IO.read @fname, -1 }.should raise_error(ArgumentError) end - ruby_version_is ''...'3.3' do - it "raises an Errno::EINVAL when not passed a valid offset" do - -> { IO.read @fname, 0, -1 }.should raise_error(Errno::EINVAL) - -> { IO.read @fname, -1, -1 }.should raise_error(Errno::EINVAL) - end - end - - ruby_version_is '3.3' do - it "raises an ArgumentError when not passed a valid offset" do - -> { IO.read @fname, 0, -1 }.should raise_error(ArgumentError) - -> { IO.read @fname, -1, -1 }.should raise_error(ArgumentError) - end + it "raises an ArgumentError when not passed a valid offset" do + -> { IO.read @fname, 0, -1 }.should raise_error(ArgumentError) + -> { IO.read @fname, -1, -1 }.should raise_error(ArgumentError) end it "uses the external encoding specified via the :external_encoding option" do @@ -232,14 +214,12 @@ end end - ruby_version_is "3.3" do - # https://bugs.ruby-lang.org/issues/19630 - it "warns about deprecation" do - cmd = "|echo ok" - -> { - IO.read(cmd) - }.should complain(/IO process creation with a leading '\|'/) - end + # https://bugs.ruby-lang.org/issues/19630 + it "warns about deprecation" do + cmd = "|echo ok" + -> { + IO.read(cmd) + }.should complain(/IO process creation with a leading '\|'/) end end end @@ -322,11 +302,9 @@ -> { @io.read(nil, 'frozen-string'.freeze) }.should raise_error(FrozenError) end - ruby_bug "", ""..."3.3" do - it "raise FrozenError if the output buffer is frozen (2)" do - @io.read - -> { @io.read(1, ''.freeze) }.should raise_error(FrozenError) - end + it "raise FrozenError if the output buffer is frozen (2)" do + @io.read + -> { @io.read(1, ''.freeze) }.should raise_error(FrozenError) end it "consumes zero bytes when reading zero bytes" do diff --git a/spec/ruby/core/io/readlines_spec.rb b/spec/ruby/core/io/readlines_spec.rb index b4770775d1e813..07d29ea5317f2d 100644 --- a/spec/ruby/core/io/readlines_spec.rb +++ b/spec/ruby/core/io/readlines_spec.rb @@ -207,14 +207,12 @@ end end - ruby_version_is "3.3" do - # https://bugs.ruby-lang.org/issues/19630 - it "warns about deprecation given a path with a pipe" do - cmd = "|echo ok" - -> { - IO.readlines(cmd) - }.should complain(/IO process creation with a leading '\|'/) - end + # https://bugs.ruby-lang.org/issues/19630 + it "warns about deprecation given a path with a pipe" do + cmd = "|echo ok" + -> { + IO.readlines(cmd) + }.should complain(/IO process creation with a leading '\|'/) end end diff --git a/spec/ruby/core/io/select_spec.rb b/spec/ruby/core/io/select_spec.rb index 3893e7620f9e10..9fdb7e12c932db 100644 --- a/spec/ruby/core/io/select_spec.rb +++ b/spec/ruby/core/io/select_spec.rb @@ -149,16 +149,28 @@ end end -describe "IO.select when passed nil for timeout" do - it "sleeps forever and sets the thread status to 'sleep'" do - t = Thread.new do - IO.select(nil, nil, nil, nil) +describe "IO.select with infinite timeout" do + describe :io_select_infinite_timeout, shared: true do + it "sleeps forever and sets the thread status to 'sleep'" do + t = Thread.new do + IO.select(nil, nil, nil, @method) + end + + Thread.pass while t.status && t.status != "sleep" + t.join unless t.status + t.status.should == "sleep" + t.kill + t.join end + end - Thread.pass while t.status && t.status != "sleep" - t.join unless t.status - t.status.should == "sleep" - t.kill - t.join + describe "IO.select when passed nil for timeout" do + it_behaves_like :io_select_infinite_timeout, nil + end + + ruby_version_is "4.0" do + describe "IO.select when passed Float::INFINITY for timeout" do + it_behaves_like :io_select_infinite_timeout, Float::INFINITY + end end end diff --git a/spec/ruby/core/io/shared/readlines.rb b/spec/ruby/core/io/shared/readlines.rb index 6c1fa11a596800..77eb9cbd65cb8f 100644 --- a/spec/ruby/core/io/shared/readlines.rb +++ b/spec/ruby/core/io/shared/readlines.rb @@ -83,11 +83,9 @@ -> { IO.send(@method, @name, 2**128, &@object) }.should raise_error(RangeError) end - ruby_bug "#18767", ""..."3.3" do - describe "when passed limit" do - it "raises ArgumentError when passed 0 as a limit" do - -> { IO.send(@method, @name, 0, &@object) }.should raise_error(ArgumentError) - end + describe "when passed limit" do + it "raises ArgumentError when passed 0 as a limit" do + -> { IO.send(@method, @name, 0, &@object) }.should raise_error(ArgumentError) end end end diff --git a/spec/ruby/core/io/write_spec.rb b/spec/ruby/core/io/write_spec.rb index e58100f8467d9c..95e6371985bf7c 100644 --- a/spec/ruby/core/io/write_spec.rb +++ b/spec/ruby/core/io/write_spec.rb @@ -102,6 +102,13 @@ File.binread(@filename).should == "h\u0000\u0000\u0000i\u0000\u0000\u0000" end + it "ignores the 'bom|' prefix" do + File.open(@filename, "w", encoding: 'bom|utf-8') do |file| + file.write("hi") + end + File.binread(@filename).should == "hi" + end + it "raises a invalid byte sequence error if invalid bytes are being written" do # pack "\xFEhi" to avoid utf-8 conflict xFEhi = ([254].pack('C*') + 'hi').force_encoding('utf-8') @@ -220,7 +227,7 @@ end end - ruby_version_is "3.3"..."4.0" do + ruby_version_is ""..."4.0" do # https://bugs.ruby-lang.org/issues/19630 it "warns about deprecation given a path with a pipe" do -> { diff --git a/spec/ruby/core/kernel/Integer_spec.rb b/spec/ruby/core/kernel/Integer_spec.rb index 74dd3e0dd2ef27..c62b8b08013898 100644 --- a/spec/ruby/core/kernel/Integer_spec.rb +++ b/spec/ruby/core/kernel/Integer_spec.rb @@ -586,19 +586,10 @@ Integer("777", obj).should == 0777 end - # https://bugs.ruby-lang.org/issues/19349 - ruby_version_is ''...'3.3' do - it "ignores the base if it is not an integer and does not respond to #to_i" do - Integer("777", "8").should == 777 - end - end - - ruby_version_is '3.3' do - it "raises a TypeError if it is not an integer and does not respond to #to_i" do - -> { - Integer("777", "8") - }.should raise_error(TypeError, "no implicit conversion of String into Integer") - end + it "raises a TypeError if it is not an integer and does not respond to #to_i" do + -> { + Integer("777", "8") + }.should raise_error(TypeError, "no implicit conversion of String into Integer") end describe "when passed exception: false" do diff --git a/spec/ruby/core/kernel/caller_spec.rb b/spec/ruby/core/kernel/caller_spec.rb index 7cd703de5a3a8d..df051ef07f2d16 100644 --- a/spec/ruby/core/kernel/caller_spec.rb +++ b/spec/ruby/core/kernel/caller_spec.rb @@ -84,14 +84,25 @@ end guard -> { Kernel.instance_method(:tap).source_location } do - ruby_version_is ""..."4.0" do + ruby_version_is ""..."3.4" do it "includes core library methods defined in Ruby" do file, line = Kernel.instance_method(:tap).source_location file.should.start_with?(' { lambda(&proc{}) }.should complain("#{__FILE__}:#{__LINE__}: warning: lambda without a literal block is deprecated; use the proc without lambda instead\n") - end - end - - ruby_version_is "3.3" do - it "raises when proc isn't a lambda" do - -> { lambda(&proc{}) }.should raise_error(ArgumentError, /the lambda method requires a literal block/) - end + it "raises when proc isn't a lambda" do + -> { lambda(&proc{}) }.should raise_error(ArgumentError, /the lambda method requires a literal block/) end it "doesn't warn when proc is lambda" do diff --git a/spec/ruby/core/kernel/open_spec.rb b/spec/ruby/core/kernel/open_spec.rb index b967d5044ba92b..9d3f3760b96b3e 100644 --- a/spec/ruby/core/kernel/open_spec.rb +++ b/spec/ruby/core/kernel/open_spec.rb @@ -79,14 +79,12 @@ end end - ruby_version_is "3.3" do - # https://bugs.ruby-lang.org/issues/19630 - it "warns about deprecation given a path with a pipe" do - cmd = "|echo ok" - -> { - open(cmd) { |f| f.read } - }.should complain(/Kernel#open with a leading '\|'/) - end + # https://bugs.ruby-lang.org/issues/19630 + it "warns about deprecation given a path with a pipe" do + cmd = "|echo ok" + -> { + open(cmd) { |f| f.read } + }.should complain(/Kernel#open with a leading '\|'/) end end diff --git a/spec/ruby/core/kernel/shared/require.rb b/spec/ruby/core/kernel/shared/require.rb index 52f86f73e50f48..ef5b9486c6157d 100644 --- a/spec/ruby/core/kernel/shared/require.rb +++ b/spec/ruby/core/kernel/shared/require.rb @@ -266,15 +266,13 @@ ScratchPad.recorded.should == [:loaded] end - ruby_bug "#17340", ''...'3.3' do - it "loads a file concurrently" do - path = File.expand_path "concurrent_require_fixture.rb", CODE_LOADING_DIR - ScratchPad.record(@object) - -> { - @object.require(path) - }.should_not complain(/circular require considered harmful/, verbose: true) - ScratchPad.recorded.join - end + it "loads a file concurrently" do + path = File.expand_path "concurrent_require_fixture.rb", CODE_LOADING_DIR + ScratchPad.record(@object) + -> { + @object.require(path) + }.should_not complain(/circular require considered harmful/, verbose: true) + ScratchPad.recorded.join end end diff --git a/spec/ruby/core/kernel/sleep_spec.rb b/spec/ruby/core/kernel/sleep_spec.rb index e9c600aac41107..0b003ad189a48b 100644 --- a/spec/ruby/core/kernel/sleep_spec.rb +++ b/spec/ruby/core/kernel/sleep_spec.rb @@ -63,27 +63,19 @@ def o.divmod(*); [0, 0.001]; end actual_duration.should > 0.01 # 100 * 0.0001 => 0.01 end - ruby_version_is ""..."3.3" do - it "raises a TypeError when passed nil" do - -> { sleep(nil) }.should raise_error(TypeError) + it "accepts a nil duration" do + running = false + t = Thread.new do + running = true + sleep(nil) + 5 end - end - - ruby_version_is "3.3" do - it "accepts a nil duration" do - running = false - t = Thread.new do - running = true - sleep(nil) - 5 - end - Thread.pass until running - Thread.pass while t.status and t.status != "sleep" + Thread.pass until running + Thread.pass while t.status and t.status != "sleep" - t.wakeup - t.value.should == 5 - end + t.wakeup + t.value.should == 5 end context "Kernel.sleep with Fiber scheduler" do diff --git a/spec/ruby/core/marshal/shared/load.rb b/spec/ruby/core/marshal/shared/load.rb index 204a4d34e3edff..692c14cfa10adb 100644 --- a/spec/ruby/core/marshal/shared/load.rb +++ b/spec/ruby/core/marshal/shared/load.rb @@ -127,36 +127,32 @@ Object.should_not.frozen? end - ruby_bug "#19427", ""..."3.3" do - it "does freeze extended objects" do - object = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x00", freeze: true) - object.should.frozen? - end + it "does freeze extended objects" do + object = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x00", freeze: true) + object.should.frozen? + end - it "does freeze extended objects with instance variables" do - object = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x06:\n@ivarT", freeze: true) - object.should.frozen? - end + it "does freeze extended objects with instance variables" do + object = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x06:\n@ivarT", freeze: true) + object.should.frozen? end - ruby_bug "#19427", ""..."3.3" do - it "returns frozen object having #_dump method" do - object = Marshal.send(@method, Marshal.dump(UserDefined.new), freeze: true) - object.should.frozen? - end + it "returns frozen object having #_dump method" do + object = Marshal.send(@method, Marshal.dump(UserDefined.new), freeze: true) + object.should.frozen? + end - it "returns frozen object responding to #marshal_dump and #marshal_load" do - object = Marshal.send(@method, Marshal.dump(UserMarshal.new), freeze: true) - object.should.frozen? - end + it "returns frozen object responding to #marshal_dump and #marshal_load" do + object = Marshal.send(@method, Marshal.dump(UserMarshal.new), freeze: true) + object.should.frozen? + end - it "returns frozen object extended by a module" do - object = Object.new - object.extend(MarshalSpec::ModuleToExtendBy) + it "returns frozen object extended by a module" do + object = Object.new + object.extend(MarshalSpec::ModuleToExtendBy) - object = Marshal.send(@method, Marshal.dump(object), freeze: true) - object.should.frozen? - end + object = Marshal.send(@method, Marshal.dump(object), freeze: true) + object.should.frozen? end it "does not call freeze method" do @@ -239,12 +235,10 @@ string.should.frozen? end - ruby_bug "#19427", ""..."3.3" do - it "call the proc with extended objects" do - objs = [] - obj = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x00", Proc.new { |o| objs << o; o }) - objs.should == [obj] - end + it "call the proc with extended objects" do + objs = [] + obj = Marshal.load("\x04\be:\x0FEnumerableo:\vObject\x00", Proc.new { |o| objs << o; o }) + objs.should == [obj] end it "returns the value of the proc" do @@ -930,15 +924,13 @@ def io.binmode; raise "binmode"; end [Meths, UserRegexp, Regexp] end - ruby_bug "#19439", ""..."3.3" do - it "restore the regexp instance variables" do - obj = Regexp.new("hello") - obj.instance_variable_set(:@regexp_ivar, [42]) + it "restore the regexp instance variables" do + obj = Regexp.new("hello") + obj.instance_variable_set(:@regexp_ivar, [42]) - new_obj = Marshal.send(@method, "\x04\bI/\nhello\x00\a:\x06EF:\x11@regexp_ivar[\x06i/") - new_obj.instance_variables.should == [:@regexp_ivar] - new_obj.instance_variable_get(:@regexp_ivar).should == [42] - end + new_obj = Marshal.send(@method, "\x04\bI/\nhello\x00\a:\x06EF:\x11@regexp_ivar[\x06i/") + new_obj.instance_variables.should == [:@regexp_ivar] + new_obj.instance_variable_get(:@regexp_ivar).should == [42] end it "preserves Regexp encoding" do diff --git a/spec/ruby/core/matchdata/named_captures_spec.rb b/spec/ruby/core/matchdata/named_captures_spec.rb index 5e4693d62d662b..10b1f884d6e4c9 100644 --- a/spec/ruby/core/matchdata/named_captures_spec.rb +++ b/spec/ruby/core/matchdata/named_captures_spec.rb @@ -13,15 +13,13 @@ /\A(?.)(?.)(?.)(?.)?\z/.match('012').named_captures.should == { 'a' => '0', 'b' => '2' } end - ruby_version_is "3.3" do - it 'returns a Hash with Symbol keys when symbolize_names is provided a true value' do - /(?.)(?.)?/.match('0').named_captures(symbolize_names: true).should == { a: '0', b: nil } - /(?.)(?.)?/.match('0').named_captures(symbolize_names: "truly").should == { a: '0', b: nil } - end + it 'returns a Hash with Symbol keys when symbolize_names is provided a true value' do + /(?.)(?.)?/.match('0').named_captures(symbolize_names: true).should == { a: '0', b: nil } + /(?.)(?.)?/.match('0').named_captures(symbolize_names: "truly").should == { a: '0', b: nil } + end - it 'returns a Hash with String keys when symbolize_names is provided a false value' do - /(?.)(?.)?/.match('02').named_captures(symbolize_names: false).should == { 'a' => '0', 'b' => '2' } - /(?.)(?.)?/.match('02').named_captures(symbolize_names: nil).should == { 'a' => '0', 'b' => '2' } - end + it 'returns a Hash with String keys when symbolize_names is provided a false value' do + /(?.)(?.)?/.match('02').named_captures(symbolize_names: false).should == { 'a' => '0', 'b' => '2' } + /(?.)(?.)?/.match('02').named_captures(symbolize_names: nil).should == { 'a' => '0', 'b' => '2' } end end diff --git a/spec/ruby/core/math/log10_spec.rb b/spec/ruby/core/math/log10_spec.rb index c4daedcd5c9d75..f3bd7fd4b86474 100644 --- a/spec/ruby/core/math/log10_spec.rb +++ b/spec/ruby/core/math/log10_spec.rb @@ -23,6 +23,10 @@ -> { Math.log10("test") }.should raise_error(TypeError) end + it "raises a TypeError if passed a numerical argument as a string" do + -> { Math.log10("1.0") }.should raise_error(TypeError) + end + it "returns NaN given NaN" do Math.log10(nan_value).nan?.should be_true end diff --git a/spec/ruby/core/module/set_temporary_name_spec.rb b/spec/ruby/core/module/set_temporary_name_spec.rb index 46605ed6758877..0b96b869c90c51 100644 --- a/spec/ruby/core/module/set_temporary_name_spec.rb +++ b/spec/ruby/core/module/set_temporary_name_spec.rb @@ -1,147 +1,145 @@ require_relative '../../spec_helper' require_relative 'fixtures/set_temporary_name' -ruby_version_is "3.3" do - describe "Module#set_temporary_name" do - it "can assign a temporary name" do - m = Module.new - m.name.should be_nil +describe "Module#set_temporary_name" do + it "can assign a temporary name" do + m = Module.new + m.name.should be_nil - m.set_temporary_name("fake_name") - m.name.should == "fake_name" + m.set_temporary_name("fake_name") + m.name.should == "fake_name" - m.set_temporary_name(nil) - m.name.should be_nil - end + m.set_temporary_name(nil) + m.name.should be_nil + end - it "returns self" do - m = Module.new - m.set_temporary_name("fake_name").should.equal? m - end + it "returns self" do + m = Module.new + m.set_temporary_name("fake_name").should.equal? m + end - it "can assign a temporary name which is not a valid constant path" do - m = Module.new + it "can assign a temporary name which is not a valid constant path" do + m = Module.new - m.set_temporary_name("name") - m.name.should == "name" + m.set_temporary_name("name") + m.name.should == "name" - m.set_temporary_name("Template['foo.rb']") - m.name.should == "Template['foo.rb']" + m.set_temporary_name("Template['foo.rb']") + m.name.should == "Template['foo.rb']" - m.set_temporary_name("a::B") - m.name.should == "a::B" + m.set_temporary_name("a::B") + m.name.should == "a::B" - m.set_temporary_name("A::b") - m.name.should == "A::b" + m.set_temporary_name("A::b") + m.name.should == "A::b" - m.set_temporary_name("A::B::") - m.name.should == "A::B::" + m.set_temporary_name("A::B::") + m.name.should == "A::B::" - m.set_temporary_name("A::::B") - m.name.should == "A::::B" + m.set_temporary_name("A::::B") + m.name.should == "A::::B" - m.set_temporary_name("A=") - m.name.should == "A=" - end + m.set_temporary_name("A=") + m.name.should == "A=" + end - it "can't assign empty string as name" do - m = Module.new - -> { m.set_temporary_name("") }.should raise_error(ArgumentError, "empty class/module name") - end + it "can't assign empty string as name" do + m = Module.new + -> { m.set_temporary_name("") }.should raise_error(ArgumentError, "empty class/module name") + end - it "can't assign a constant name as a temporary name" do - m = Module.new - -> { m.set_temporary_name("Object") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") - end + it "can't assign a constant name as a temporary name" do + m = Module.new + -> { m.set_temporary_name("Object") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") + end - it "can't assign a constant path as a temporary name" do - m = Module.new - -> { m.set_temporary_name("A::B") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") - -> { m.set_temporary_name("::A") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") - -> { m.set_temporary_name("::A::B") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") - end + it "can't assign a constant path as a temporary name" do + m = Module.new + -> { m.set_temporary_name("A::B") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") + -> { m.set_temporary_name("::A") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") + -> { m.set_temporary_name("::A::B") }.should raise_error(ArgumentError, "the temporary name must not be a constant path to avoid confusion") + end - it "can't assign name to permanent module" do - -> { Object.set_temporary_name("fake_name") }.should raise_error(RuntimeError, "can't change permanent name") - end + it "can't assign name to permanent module" do + -> { Object.set_temporary_name("fake_name") }.should raise_error(RuntimeError, "can't change permanent name") + end - it "can assign a temporary name to a module nested into an anonymous module" do - m = Module.new - module m::N; end - m::N.name.should =~ /\A#::N\z/ + it "can assign a temporary name to a module nested into an anonymous module" do + m = Module.new + module m::N; end + m::N.name.should =~ /\A#::N\z/ - m::N.set_temporary_name("fake_name") - m::N.name.should == "fake_name" + m::N.set_temporary_name("fake_name") + m::N.name.should == "fake_name" - m::N.set_temporary_name(nil) - m::N.name.should be_nil - end + m::N.set_temporary_name(nil) + m::N.name.should be_nil + end - it "discards a temporary name when an outer anonymous module gets a permanent name" do - m = Module.new - module m::N; end + it "discards a temporary name when an outer anonymous module gets a permanent name" do + m = Module.new + module m::N; end - m::N.set_temporary_name("fake_name") - m::N.name.should == "fake_name" + m::N.set_temporary_name("fake_name") + m::N.name.should == "fake_name" - ModuleSpecs::SetTemporaryNameSpec::M = m - m::N.name.should == "ModuleSpecs::SetTemporaryNameSpec::M::N" - ModuleSpecs::SetTemporaryNameSpec.send :remove_const, :M - end + ModuleSpecs::SetTemporaryNameSpec::M = m + m::N.name.should == "ModuleSpecs::SetTemporaryNameSpec::M::N" + ModuleSpecs::SetTemporaryNameSpec.send :remove_const, :M + end - it "can update the name when assigned to a constant" do - m = Module.new - m::N = Module.new - m::N.name.should =~ /\A#::N\z/ - m::N.set_temporary_name(nil) + it "can update the name when assigned to a constant" do + m = Module.new + m::N = Module.new + m::N.name.should =~ /\A#::N\z/ + m::N.set_temporary_name(nil) - m::M = m::N - m::M.name.should =~ /\A#::M\z/m - end + m::M = m::N + m::M.name.should =~ /\A#::M\z/m + end - it "can reassign a temporary name repeatedly" do - m = Module.new + it "can reassign a temporary name repeatedly" do + m = Module.new - m.set_temporary_name("fake_name") - m.name.should == "fake_name" + m.set_temporary_name("fake_name") + m.name.should == "fake_name" - m.set_temporary_name("fake_name_2") - m.name.should == "fake_name_2" - end + m.set_temporary_name("fake_name_2") + m.name.should == "fake_name_2" + end - ruby_bug "#21094", ""..."4.0" do - it "also updates a name of a nested module" do - m = Module.new - m::N = Module.new - m::N.name.should =~ /\A#::N\z/ + ruby_bug "#21094", ""..."4.0" do + it "also updates a name of a nested module" do + m = Module.new + m::N = Module.new + m::N.name.should =~ /\A#::N\z/ - m.set_temporary_name "m" - m::N.name.should == "m::N" + m.set_temporary_name "m" + m::N.name.should == "m::N" - m.set_temporary_name nil - m::N.name.should == nil - end + m.set_temporary_name nil + m::N.name.should == nil end + end - it "keeps temporary name when assigned in an anonymous module" do - outer = Module.new - m = Module.new - m.set_temporary_name "m" - m.name.should == "m" - outer::M = m - m.name.should == "m" - m.inspect.should == "m" - end + it "keeps temporary name when assigned in an anonymous module" do + outer = Module.new + m = Module.new + m.set_temporary_name "m" + m.name.should == "m" + outer::M = m + m.name.should == "m" + m.inspect.should == "m" + end - it "keeps temporary name when assigned in an anonymous module and nested before" do - outer = Module.new - m = Module.new - outer::A = m - m.set_temporary_name "m" - m.name.should == "m" - outer::M = m - m.name.should == "m" - m.inspect.should == "m" - end + it "keeps temporary name when assigned in an anonymous module and nested before" do + outer = Module.new + m = Module.new + outer::A = m + m.set_temporary_name "m" + m.name.should == "m" + outer::M = m + m.name.should == "m" + m.inspect.should == "m" end end diff --git a/spec/ruby/core/module/shared/class_eval.rb b/spec/ruby/core/module/shared/class_eval.rb index b1d5cb3814edea..526d0a20363dc8 100644 --- a/spec/ruby/core/module/shared/class_eval.rb +++ b/spec/ruby/core/module/shared/class_eval.rb @@ -52,10 +52,8 @@ def foo ModuleSpecs.send(@method, "[__FILE__, __LINE__]", "test", 102).should == ["test", 102] end - ruby_version_is "3.3" do - it "uses the caller location as default filename" do - ModuleSpecs.send(@method, "[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] - end + it "uses the caller location as default filename" do + ModuleSpecs.send(@method, "[__FILE__, __LINE__]").should == ["(eval at #{__FILE__}:#{__LINE__})", 1] end it "converts a non-string filename to a string using to_str" do diff --git a/spec/ruby/core/nil/singleton_method_spec.rb b/spec/ruby/core/nil/singleton_method_spec.rb index 8d898b1cc94d70..fb47af0c3e8c5d 100644 --- a/spec/ruby/core/nil/singleton_method_spec.rb +++ b/spec/ruby/core/nil/singleton_method_spec.rb @@ -1,15 +1,13 @@ require_relative '../../spec_helper' describe "NilClass#singleton_method" do - ruby_version_is '3.3' do - it "raises regardless of whether NilClass defines the method" do + it "raises regardless of whether NilClass defines the method" do + -> { nil.singleton_method(:foo) }.should raise_error(NameError) + begin + def (nil).foo; end -> { nil.singleton_method(:foo) }.should raise_error(NameError) - begin - def (nil).foo; end - -> { nil.singleton_method(:foo) }.should raise_error(NameError) - ensure - NilClass.send(:remove_method, :foo) - end + ensure + NilClass.send(:remove_method, :foo) end end end diff --git a/spec/ruby/core/numeric/remainder_spec.rb b/spec/ruby/core/numeric/remainder_spec.rb index 674fa22d8ef997..29654310d231e2 100644 --- a/spec/ruby/core/numeric/remainder_spec.rb +++ b/spec/ruby/core/numeric/remainder_spec.rb @@ -6,9 +6,7 @@ @obj = NumericSpecs::Subclass.new @result = mock("Numeric#% result") @other = mock("Passed Object") - ruby_version_is "3.3" do - @other.should_receive(:coerce).with(@obj).and_return([@obj, @other]) - end + @other.should_receive(:coerce).with(@obj).and_return([@obj, @other]) end it "returns the result of calling self#% with other if self is 0" do diff --git a/spec/ruby/core/objectspace/weakkeymap/clear_spec.rb b/spec/ruby/core/objectspace/weakkeymap/clear_spec.rb index 8050e2c30729d6..b1804ec9b003b7 100644 --- a/spec/ruby/core/objectspace/weakkeymap/clear_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/clear_spec.rb @@ -1,27 +1,25 @@ require_relative '../../../spec_helper' -ruby_version_is '3.3' do - describe "ObjectSpace::WeakKeyMap#clear" do - it "removes all the entries" do - m = ObjectSpace::WeakKeyMap.new +describe "ObjectSpace::WeakKeyMap#clear" do + it "removes all the entries" do + m = ObjectSpace::WeakKeyMap.new - key = Object.new - value = Object.new - m[key] = value + key = Object.new + value = Object.new + m[key] = value - key2 = Object.new - value2 = Object.new - m[key2] = value2 + key2 = Object.new + value2 = Object.new + m[key2] = value2 - m.clear + m.clear - m.key?(key).should == false - m.key?(key2).should == false - end + m.key?(key).should == false + m.key?(key2).should == false + end - it "returns self" do - m = ObjectSpace::WeakKeyMap.new - m.clear.should.equal?(m) - end + it "returns self" do + m = ObjectSpace::WeakKeyMap.new + m.clear.should.equal?(m) end end diff --git a/spec/ruby/core/objectspace/weakkeymap/delete_spec.rb b/spec/ruby/core/objectspace/weakkeymap/delete_spec.rb index 3cd61355d64f9f..ad32c2c75efda4 100644 --- a/spec/ruby/core/objectspace/weakkeymap/delete_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/delete_spec.rb @@ -1,51 +1,49 @@ require_relative '../../../spec_helper' -ruby_version_is '3.3' do - describe "ObjectSpace::WeakKeyMap#delete" do - it "removes the entry and returns the deleted value" do - m = ObjectSpace::WeakKeyMap.new - key = Object.new - value = Object.new - m[key] = value - - m.delete(key).should == value - m.key?(key).should == false - end +describe "ObjectSpace::WeakKeyMap#delete" do + it "removes the entry and returns the deleted value" do + m = ObjectSpace::WeakKeyMap.new + key = Object.new + value = Object.new + m[key] = value + + m.delete(key).should == value + m.key?(key).should == false + end - it "uses equality semantic" do - m = ObjectSpace::WeakKeyMap.new - key = "foo".upcase - value = Object.new - m[key] = value + it "uses equality semantic" do + m = ObjectSpace::WeakKeyMap.new + key = "foo".upcase + value = Object.new + m[key] = value - m.delete("foo".upcase).should == value - m.key?(key).should == false - end + m.delete("foo".upcase).should == value + m.key?(key).should == false + end - it "calls supplied block if the key is not found" do - key = Object.new - m = ObjectSpace::WeakKeyMap.new - return_value = m.delete(key) do |yielded_key| - yielded_key.should == key - 5 - end - return_value.should == 5 + it "calls supplied block if the key is not found" do + key = Object.new + m = ObjectSpace::WeakKeyMap.new + return_value = m.delete(key) do |yielded_key| + yielded_key.should == key + 5 end + return_value.should == 5 + end - it "returns nil if the key is not found when no block is given" do - m = ObjectSpace::WeakKeyMap.new - m.delete(Object.new).should == nil - end + it "returns nil if the key is not found when no block is given" do + m = ObjectSpace::WeakKeyMap.new + m.delete(Object.new).should == nil + end - it "returns nil when a key cannot be garbage collected" do - map = ObjectSpace::WeakKeyMap.new + it "returns nil when a key cannot be garbage collected" do + map = ObjectSpace::WeakKeyMap.new - map.delete(1).should == nil - map.delete(1.0).should == nil - map.delete(:a).should == nil - map.delete(true).should == nil - map.delete(false).should == nil - map.delete(nil).should == nil - end + map.delete(1).should == nil + map.delete(1.0).should == nil + map.delete(:a).should == nil + map.delete(true).should == nil + map.delete(false).should == nil + map.delete(nil).should == nil end end diff --git a/spec/ruby/core/objectspace/weakkeymap/element_reference_spec.rb b/spec/ruby/core/objectspace/weakkeymap/element_reference_spec.rb index 51368e8d3ba3af..53eff79c40fb1c 100644 --- a/spec/ruby/core/objectspace/weakkeymap/element_reference_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/element_reference_spec.rb @@ -1,107 +1,105 @@ require_relative '../../../spec_helper' require_relative 'fixtures/classes' -ruby_version_is "3.3" do - describe "ObjectSpace::WeakKeyMap#[]" do - it "is faithful to the map's content" do - map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a b].map(&:upcase) - ref1, ref2 = %w[x y] - map[key1] = ref1 - map[key1].should == ref1 - map[key1] = ref1 - map[key1].should == ref1 - map[key2] = ref2 - map[key1].should == ref1 - map[key2].should == ref2 - end - - it "compares keys with #eql? semantics" do - map = ObjectSpace::WeakKeyMap.new - key = [1.0] - map[key] = "x" - map[[1]].should == nil - map[[1.0]].should == "x" - key.should == [1.0] # keep the key alive until here to keep the map entry - - map = ObjectSpace::WeakKeyMap.new - key = [1] - map[key] = "x" - map[[1.0]].should == nil - map[[1]].should == "x" - key.should == [1] # keep the key alive until here to keep the map entry - - map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a a].map(&:upcase) - ref = "x" - map[key1] = ref - map[key2].should == ref - end - - it "compares key via #hash first" do - x = mock('0') - x.should_receive(:hash).and_return(0) - - map = ObjectSpace::WeakKeyMap.new - key = 'foo' - map[key] = :bar - map[x].should == nil - end - - it "does not compare keys with different #hash values via #eql?" do - x = mock('x') - x.should_not_receive(:eql?) - x.stub!(:hash).and_return(0) - - y = mock('y') - y.should_not_receive(:eql?) - y.stub!(:hash).and_return(1) - - map = ObjectSpace::WeakKeyMap.new - map[y] = 1 - map[x].should == nil - end - - it "compares keys with the same #hash value via #eql?" do - x = mock('x') - x.should_receive(:eql?).and_return(true) - x.stub!(:hash).and_return(42) - - y = mock('y') - y.should_not_receive(:eql?) - y.stub!(:hash).and_return(42) - - map = ObjectSpace::WeakKeyMap.new - map[y] = 1 - map[x].should == 1 - end - - it "finds a value via an identical key even when its #eql? isn't reflexive" do - x = mock('x') - x.should_receive(:hash).at_least(1).and_return(42) - x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI. - - map = ObjectSpace::WeakKeyMap.new - map[x] = :x - map[x].should == :x - end - - it "supports keys with private #hash method" do - key = WeakKeyMapSpecs::KeyWithPrivateHash.new - map = ObjectSpace::WeakKeyMap.new - map[key] = 42 - map[key].should == 42 - end - - it "returns nil and does not raise error when a key cannot be garbage collected" do - map = ObjectSpace::WeakKeyMap.new - - map[1].should == nil - map[1.0].should == nil - map[:a].should == nil - map[true].should == nil - map[false].should == nil - map[nil].should == nil - end +describe "ObjectSpace::WeakKeyMap#[]" do + it "is faithful to the map's content" do + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a b].map(&:upcase) + ref1, ref2 = %w[x y] + map[key1] = ref1 + map[key1].should == ref1 + map[key1] = ref1 + map[key1].should == ref1 + map[key2] = ref2 + map[key1].should == ref1 + map[key2].should == ref2 + end + + it "compares keys with #eql? semantics" do + map = ObjectSpace::WeakKeyMap.new + key = [1.0] + map[key] = "x" + map[[1]].should == nil + map[[1.0]].should == "x" + key.should == [1.0] # keep the key alive until here to keep the map entry + + map = ObjectSpace::WeakKeyMap.new + key = [1] + map[key] = "x" + map[[1.0]].should == nil + map[[1]].should == "x" + key.should == [1] # keep the key alive until here to keep the map entry + + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a a].map(&:upcase) + ref = "x" + map[key1] = ref + map[key2].should == ref + end + + it "compares key via #hash first" do + x = mock('0') + x.should_receive(:hash).and_return(0) + + map = ObjectSpace::WeakKeyMap.new + key = 'foo' + map[key] = :bar + map[x].should == nil + end + + it "does not compare keys with different #hash values via #eql?" do + x = mock('x') + x.should_not_receive(:eql?) + x.stub!(:hash).and_return(0) + + y = mock('y') + y.should_not_receive(:eql?) + y.stub!(:hash).and_return(1) + + map = ObjectSpace::WeakKeyMap.new + map[y] = 1 + map[x].should == nil + end + + it "compares keys with the same #hash value via #eql?" do + x = mock('x') + x.should_receive(:eql?).and_return(true) + x.stub!(:hash).and_return(42) + + y = mock('y') + y.should_not_receive(:eql?) + y.stub!(:hash).and_return(42) + + map = ObjectSpace::WeakKeyMap.new + map[y] = 1 + map[x].should == 1 + end + + it "finds a value via an identical key even when its #eql? isn't reflexive" do + x = mock('x') + x.should_receive(:hash).at_least(1).and_return(42) + x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI. + + map = ObjectSpace::WeakKeyMap.new + map[x] = :x + map[x].should == :x + end + + it "supports keys with private #hash method" do + key = WeakKeyMapSpecs::KeyWithPrivateHash.new + map = ObjectSpace::WeakKeyMap.new + map[key] = 42 + map[key].should == 42 + end + + it "returns nil and does not raise error when a key cannot be garbage collected" do + map = ObjectSpace::WeakKeyMap.new + + map[1].should == nil + map[1.0].should == nil + map[:a].should == nil + map[true].should == nil + map[false].should == nil + map[nil].should == nil end end diff --git a/spec/ruby/core/objectspace/weakkeymap/element_set_spec.rb b/spec/ruby/core/objectspace/weakkeymap/element_set_spec.rb index 8db8d780c71a86..c480aa661ae2fd 100644 --- a/spec/ruby/core/objectspace/weakkeymap/element_set_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/element_set_spec.rb @@ -1,82 +1,80 @@ require_relative '../../../spec_helper' -ruby_version_is "3.3" do - describe "ObjectSpace::WeakKeyMap#[]=" do - def should_accept(map, key, value) - (map[key] = value).should == value - map.should.key?(key) - map[key].should == value - end +describe "ObjectSpace::WeakKeyMap#[]=" do + def should_accept(map, key, value) + (map[key] = value).should == value + map.should.key?(key) + map[key].should == value + end + + it "is correct" do + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a b].map(&:upcase) + ref1, ref2 = %w[x y] + should_accept(map, key1, ref1) + should_accept(map, key1, ref1) + should_accept(map, key2, ref2) + map[key1].should == ref1 + end + + it "requires the keys to implement #hash" do + map = ObjectSpace::WeakKeyMap.new + -> { map[BasicObject.new] = 1 }.should raise_error(NoMethodError, /undefined method [`']hash' for an instance of BasicObject/) + end - it "is correct" do + it "accepts frozen keys or values" do + map = ObjectSpace::WeakKeyMap.new + x = Object.new + should_accept(map, x, true) + should_accept(map, x, false) + should_accept(map, x, 42) + should_accept(map, x, :foo) + + y = Object.new.freeze + should_accept(map, x, y) + should_accept(map, y, x) + end + + it "does not duplicate and freeze String keys (like Hash#[]= does)" do + map = ObjectSpace::WeakKeyMap.new + key = +"a" + map[key] = 1 + + map.getkey("a").should.equal? key + map.getkey("a").should_not.frozen? + + key.should == "a" # keep the key alive until here to keep the map entry + end + + context "a key cannot be garbage collected" do + it "raises ArgumentError when Integer is used as a key" do map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a b].map(&:upcase) - ref1, ref2 = %w[x y] - should_accept(map, key1, ref1) - should_accept(map, key1, ref1) - should_accept(map, key2, ref2) - map[key1].should == ref1 + -> { map[1] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) end - it "requires the keys to implement #hash" do + it "raises ArgumentError when Float is used as a key" do map = ObjectSpace::WeakKeyMap.new - -> { map[BasicObject.new] = 1 }.should raise_error(NoMethodError, /undefined method [`']hash' for an instance of BasicObject/) + -> { map[1.0] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) end - it "accepts frozen keys or values" do + it "raises ArgumentError when Symbol is used as a key" do map = ObjectSpace::WeakKeyMap.new - x = Object.new - should_accept(map, x, true) - should_accept(map, x, false) - should_accept(map, x, 42) - should_accept(map, x, :foo) - - y = Object.new.freeze - should_accept(map, x, y) - should_accept(map, y, x) + -> { map[:a] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) end - it "does not duplicate and freeze String keys (like Hash#[]= does)" do + it "raises ArgumentError when true is used as a key" do map = ObjectSpace::WeakKeyMap.new - key = +"a" - map[key] = 1 - - map.getkey("a").should.equal? key - map.getkey("a").should_not.frozen? - - key.should == "a" # keep the key alive until here to keep the map entry + -> { map[true] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) end - context "a key cannot be garbage collected" do - it "raises ArgumentError when Integer is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[1] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end - - it "raises ArgumentError when Float is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[1.0] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end - - it "raises ArgumentError when Symbol is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[:a] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end - - it "raises ArgumentError when true is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[true] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end - - it "raises ArgumentError when false is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[false] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end + it "raises ArgumentError when false is used as a key" do + map = ObjectSpace::WeakKeyMap.new + -> { map[false] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) + end - it "raises ArgumentError when nil is used as a key" do - map = ObjectSpace::WeakKeyMap.new - -> { map[nil] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) - end + it "raises ArgumentError when nil is used as a key" do + map = ObjectSpace::WeakKeyMap.new + -> { map[nil] = "x" }.should raise_error(ArgumentError, /WeakKeyMap (keys )?must be garbage collectable/) end end end diff --git a/spec/ruby/core/objectspace/weakkeymap/getkey_spec.rb b/spec/ruby/core/objectspace/weakkeymap/getkey_spec.rb index 8a2dbf809d8c23..0c8dec8aea5248 100644 --- a/spec/ruby/core/objectspace/weakkeymap/getkey_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/getkey_spec.rb @@ -1,28 +1,26 @@ require_relative '../../../spec_helper' -ruby_version_is "3.3" do - describe "ObjectSpace::WeakKeyMap#getkey" do - it "returns the existing equal key" do - map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a a].map(&:upcase) +describe "ObjectSpace::WeakKeyMap#getkey" do + it "returns the existing equal key" do + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a a].map(&:upcase) - map[key1] = true - map.getkey(key2).should equal(key1) - map.getkey("X").should == nil + map[key1] = true + map.getkey(key2).should equal(key1) + map.getkey("X").should == nil - key1.should == "A" # keep the key alive until here to keep the map entry - key2.should == "A" # keep the key alive until here to keep the map entry - end + key1.should == "A" # keep the key alive until here to keep the map entry + key2.should == "A" # keep the key alive until here to keep the map entry + end - it "returns nil when a key cannot be garbage collected" do - map = ObjectSpace::WeakKeyMap.new + it "returns nil when a key cannot be garbage collected" do + map = ObjectSpace::WeakKeyMap.new - map.getkey(1).should == nil - map.getkey(1.0).should == nil - map.getkey(:a).should == nil - map.getkey(true).should == nil - map.getkey(false).should == nil - map.getkey(nil).should == nil - end + map.getkey(1).should == nil + map.getkey(1.0).should == nil + map.getkey(:a).should == nil + map.getkey(true).should == nil + map.getkey(false).should == nil + map.getkey(nil).should == nil end end diff --git a/spec/ruby/core/objectspace/weakkeymap/inspect_spec.rb b/spec/ruby/core/objectspace/weakkeymap/inspect_spec.rb index 319f050970e31a..b6bb4691584293 100644 --- a/spec/ruby/core/objectspace/weakkeymap/inspect_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/inspect_spec.rb @@ -1,21 +1,19 @@ require_relative '../../../spec_helper' -ruby_version_is "3.3" do - describe "ObjectSpace::WeakKeyMap#inspect" do - it "only displays size in output" do - map = ObjectSpace::WeakKeyMap.new - key1, key2, key3 = "foo", "bar", "bar" - map.inspect.should =~ /\A\#\z/ - map[key1] = 1 - map.inspect.should =~ /\A\#\z/ - map[key2] = 2 - map.inspect.should =~ /\A\#\z/ - map[key3] = 3 - map.inspect.should =~ /\A\#\z/ +describe "ObjectSpace::WeakKeyMap#inspect" do + it "only displays size in output" do + map = ObjectSpace::WeakKeyMap.new + key1, key2, key3 = "foo", "bar", "bar" + map.inspect.should =~ /\A\#\z/ + map[key1] = 1 + map.inspect.should =~ /\A\#\z/ + map[key2] = 2 + map.inspect.should =~ /\A\#\z/ + map[key3] = 3 + map.inspect.should =~ /\A\#\z/ - key1.should == "foo" # keep the key alive until here to keep the map entry - key2.should == "bar" # keep the key alive until here to keep the map entry - key3.should == "bar" # keep the key alive until here to keep the map entry - end + key1.should == "foo" # keep the key alive until here to keep the map entry + key2.should == "bar" # keep the key alive until here to keep the map entry + key3.should == "bar" # keep the key alive until here to keep the map entry end end diff --git a/spec/ruby/core/objectspace/weakkeymap/key_spec.rb b/spec/ruby/core/objectspace/weakkeymap/key_spec.rb index a9a2e12432c845..e0b686667197cc 100644 --- a/spec/ruby/core/objectspace/weakkeymap/key_spec.rb +++ b/spec/ruby/core/objectspace/weakkeymap/key_spec.rb @@ -1,44 +1,42 @@ require_relative '../../../spec_helper' -ruby_version_is "3.3" do - describe "ObjectSpace::WeakKeyMap#key?" do - it "recognizes keys in use" do - map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a b].map(&:upcase) - ref1, ref2 = %w[x y] +describe "ObjectSpace::WeakKeyMap#key?" do + it "recognizes keys in use" do + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a b].map(&:upcase) + ref1, ref2 = %w[x y] - map[key1] = ref1 - map.key?(key1).should == true - map[key1] = ref1 - map.key?(key1).should == true - map[key2] = ref2 - map.key?(key2).should == true - end + map[key1] = ref1 + map.key?(key1).should == true + map[key1] = ref1 + map.key?(key1).should == true + map[key2] = ref2 + map.key?(key2).should == true + end - it "matches using equality semantics" do - map = ObjectSpace::WeakKeyMap.new - key1, key2 = %w[a a].map(&:upcase) - ref = "x" - map[key1] = ref - map.key?(key2).should == true - end + it "matches using equality semantics" do + map = ObjectSpace::WeakKeyMap.new + key1, key2 = %w[a a].map(&:upcase) + ref = "x" + map[key1] = ref + map.key?(key2).should == true + end - it "reports true if the pair exists and the value is nil" do - map = ObjectSpace::WeakKeyMap.new - key = Object.new - map[key] = nil - map.key?(key).should == true - end + it "reports true if the pair exists and the value is nil" do + map = ObjectSpace::WeakKeyMap.new + key = Object.new + map[key] = nil + map.key?(key).should == true + end - it "returns false when a key cannot be garbage collected" do - map = ObjectSpace::WeakKeyMap.new + it "returns false when a key cannot be garbage collected" do + map = ObjectSpace::WeakKeyMap.new - map.key?(1).should == false - map.key?(1.0).should == false - map.key?(:a).should == false - map.key?(true).should == false - map.key?(false).should == false - map.key?(nil).should == false - end + map.key?(1).should == false + map.key?(1.0).should == false + map.key?(:a).should == false + map.key?(true).should == false + map.key?(false).should == false + map.key?(nil).should == false end end diff --git a/spec/ruby/core/objectspace/weakmap/delete_spec.rb b/spec/ruby/core/objectspace/weakmap/delete_spec.rb index 302de264fb2998..03beebbb83419d 100644 --- a/spec/ruby/core/objectspace/weakmap/delete_spec.rb +++ b/spec/ruby/core/objectspace/weakmap/delete_spec.rb @@ -1,30 +1,28 @@ require_relative '../../../spec_helper' -ruby_version_is '3.3' do - describe "ObjectSpace::WeakMap#delete" do - it "removes the entry and returns the deleted value" do - m = ObjectSpace::WeakMap.new - key = Object.new - value = Object.new - m[key] = value +describe "ObjectSpace::WeakMap#delete" do + it "removes the entry and returns the deleted value" do + m = ObjectSpace::WeakMap.new + key = Object.new + value = Object.new + m[key] = value - m.delete(key).should == value - m.key?(key).should == false - end + m.delete(key).should == value + m.key?(key).should == false + end - it "calls supplied block if the key is not found" do - key = Object.new - m = ObjectSpace::WeakMap.new - return_value = m.delete(key) do |yielded_key| - yielded_key.should == key - 5 - end - return_value.should == 5 + it "calls supplied block if the key is not found" do + key = Object.new + m = ObjectSpace::WeakMap.new + return_value = m.delete(key) do |yielded_key| + yielded_key.should == key + 5 end + return_value.should == 5 + end - it "returns nil if the key is not found when no block is given" do - m = ObjectSpace::WeakMap.new - m.delete(Object.new).should == nil - end + it "returns nil if the key is not found when no block is given" do + m = ObjectSpace::WeakMap.new + m.delete(Object.new).should == nil end end diff --git a/spec/ruby/core/proc/clone_spec.rb b/spec/ruby/core/proc/clone_spec.rb index 730dc421a87086..7d47f2cde5b4a5 100644 --- a/spec/ruby/core/proc/clone_spec.rb +++ b/spec/ruby/core/proc/clone_spec.rb @@ -5,7 +5,7 @@ describe "Proc#clone" do it_behaves_like :proc_dup, :clone - ruby_bug "cloning a frozen proc is broken on Ruby 3.3", "3.3"..."3.4" do + ruby_bug "cloning a frozen proc is broken on Ruby 3.3", ""..."3.4" do it "preserves frozen status" do proc = Proc.new { } proc.freeze @@ -14,17 +14,15 @@ end end - ruby_version_is "3.3" do - it "calls #initialize_clone on subclass" do - obj = ProcSpecs::MyProc2.new(:a, 2) { } - dup = obj.clone + it "calls #initialize_clone on subclass" do + obj = ProcSpecs::MyProc2.new(:a, 2) { } + dup = obj.clone - dup.should_not equal(obj) - dup.class.should == ProcSpecs::MyProc2 + dup.should_not equal(obj) + dup.class.should == ProcSpecs::MyProc2 - dup.first.should == :a - dup.second.should == 2 - dup.initializer.should == :clone - end + dup.first.should == :a + dup.second.should == 2 + dup.initializer.should == :clone end end diff --git a/spec/ruby/core/proc/dup_spec.rb b/spec/ruby/core/proc/dup_spec.rb index 716357d1f0e327..bdb7d8ab5a4e82 100644 --- a/spec/ruby/core/proc/dup_spec.rb +++ b/spec/ruby/core/proc/dup_spec.rb @@ -12,17 +12,15 @@ proc.dup.frozen?.should == false end - ruby_version_is "3.3" do - it "calls #initialize_dup on subclass" do - obj = ProcSpecs::MyProc2.new(:a, 2) { } - dup = obj.dup + it "calls #initialize_dup on subclass" do + obj = ProcSpecs::MyProc2.new(:a, 2) { } + dup = obj.dup - dup.should_not equal(obj) - dup.class.should == ProcSpecs::MyProc2 + dup.should_not equal(obj) + dup.class.should == ProcSpecs::MyProc2 - dup.first.should == :a - dup.second.should == 2 - dup.initializer.should == :dup - end + dup.first.should == :a + dup.second.should == 2 + dup.initializer.should == :dup end end diff --git a/spec/ruby/core/proc/lambda_spec.rb b/spec/ruby/core/proc/lambda_spec.rb index 5c3c38fc2a64c1..67ee4645cd1f0a 100644 --- a/spec/ruby/core/proc/lambda_spec.rb +++ b/spec/ruby/core/proc/lambda_spec.rb @@ -14,13 +14,6 @@ Proc.new {}.lambda?.should be_false end - ruby_version_is ""..."3.3" do - it "is preserved when passing a Proc with & to the lambda keyword" do - suppress_warning {lambda(&->{})}.lambda?.should be_true - suppress_warning {lambda(&proc{})}.lambda?.should be_false - end - end - it "is preserved when passing a Proc with & to the proc keyword" do proc(&->{}).lambda?.should be_true proc(&proc{}).lambda?.should be_false diff --git a/spec/ruby/core/process/argv0_spec.rb b/spec/ruby/core/process/argv0_spec.rb index f5aba719e96a73..9cba382c009da4 100644 --- a/spec/ruby/core/process/argv0_spec.rb +++ b/spec/ruby/core/process/argv0_spec.rb @@ -13,10 +13,8 @@ end end - ruby_bug "#19597", ""..."3.3" do - it "returns a frozen object" do - Process.argv0.should.frozen? - end + it "returns a frozen object" do + Process.argv0.should.frozen? end it "returns every time the same object" do diff --git a/spec/ruby/core/process/status/bit_and_spec.rb b/spec/ruby/core/process/status/bit_and_spec.rb index a80536462947f2..9fd1425a97600e 100644 --- a/spec/ruby/core/process/status/bit_and_spec.rb +++ b/spec/ruby/core/process/status/bit_and_spec.rb @@ -17,7 +17,7 @@ end end - ruby_version_is "3.3"..."4.0" do + ruby_version_is ""..."4.0" do it "raises an ArgumentError if mask is negative" do suppress_warning do ruby_exe("exit(0)") diff --git a/spec/ruby/core/process/status/right_shift_spec.rb b/spec/ruby/core/process/status/right_shift_spec.rb index 355aaf4c9532cb..3eaedf50550e1d 100644 --- a/spec/ruby/core/process/status/right_shift_spec.rb +++ b/spec/ruby/core/process/status/right_shift_spec.rb @@ -16,7 +16,7 @@ end end - ruby_version_is "3.3"..."4.0" do + ruby_version_is ""..."4.0" do it "raises an ArgumentError if shift value is negative" do suppress_warning do ruby_exe("exit(0)") diff --git a/spec/ruby/core/process/warmup_spec.rb b/spec/ruby/core/process/warmup_spec.rb index b562d52d226715..4530ae222c2606 100644 --- a/spec/ruby/core/process/warmup_spec.rb +++ b/spec/ruby/core/process/warmup_spec.rb @@ -1,11 +1,9 @@ require_relative '../../spec_helper' describe "Process.warmup" do - ruby_version_is "3.3" do - # The behavior is entirely implementation specific. - # Other implementations are free to just make it a noop - it "is implemented" do - Process.warmup.should == true - end + # The behavior is entirely implementation specific. + # Other implementations are free to just make it a noop + it "is implemented" do + Process.warmup.should == true end end diff --git a/spec/ruby/core/range/case_compare_spec.rb b/spec/ruby/core/range/case_compare_spec.rb index c9b253f0a585d9..7a76487d68c575 100644 --- a/spec/ruby/core/range/case_compare_spec.rb +++ b/spec/ruby/core/range/case_compare_spec.rb @@ -11,9 +11,7 @@ it_behaves_like :range_cover_and_include, :=== it_behaves_like :range_cover, :=== - ruby_bug "#19533", ""..."3.3" do - it "returns true on any value if begin and end are both nil" do - (nil..nil).should === 1 - end + it "returns true on any value if begin and end are both nil" do + (nil..nil).should === 1 end end diff --git a/spec/ruby/core/range/overlap_spec.rb b/spec/ruby/core/range/overlap_spec.rb index 9b6fc134934208..3e7d2bdda8acf9 100644 --- a/spec/ruby/core/range/overlap_spec.rb +++ b/spec/ruby/core/range/overlap_spec.rb @@ -1,89 +1,87 @@ require_relative '../../spec_helper' -ruby_version_is '3.3' do - describe "Range#overlap?" do - it "returns true if other Range overlaps self" do - (0..2).overlap?(1..3).should == true - (1..3).overlap?(0..2).should == true - (0..2).overlap?(0..2).should == true - (0..3).overlap?(1..2).should == true - (1..2).overlap?(0..3).should == true - - ('a'..'c').overlap?('b'..'d').should == true - end - - it "returns false if other Range does not overlap self" do - (0..2).overlap?(3..4).should == false - (0..2).overlap?(-4..-1).should == false - - ('a'..'c').overlap?('d'..'f').should == false - end - - it "raises TypeError when called with non-Range argument" do - -> { - (0..2).overlap?(1) - }.should raise_error(TypeError, "wrong argument type Integer (expected Range)") - end - - it "returns true when beginningless and endless Ranges overlap" do - (0..2).overlap?(..3).should == true - (0..2).overlap?(..1).should == true - (0..2).overlap?(..0).should == true - - (..3).overlap?(0..2).should == true - (..1).overlap?(0..2).should == true - (..0).overlap?(0..2).should == true - - (0..2).overlap?(-1..).should == true - (0..2).overlap?(1..).should == true - (0..2).overlap?(2..).should == true - - (-1..).overlap?(0..2).should == true - (1..).overlap?(0..2).should == true - (2..).overlap?(0..2).should == true - - (0..).overlap?(2..).should == true - (..0).overlap?(..2).should == true - end - - it "returns false when beginningless and endless Ranges do not overlap" do - (0..2).overlap?(..-1).should == false - (0..2).overlap?(3..).should == false - - (..-1).overlap?(0..2).should == false - (3..).overlap?(0..2).should == false - end - - it "returns false when Ranges are not compatible" do - (0..2).overlap?('a'..'d').should == false - end - - it "return false when self is empty" do - (2..0).overlap?(1..3).should == false - (2...2).overlap?(1..3).should == false - (1...1).overlap?(1...1).should == false - (2..0).overlap?(2..0).should == false - - ('c'..'a').overlap?('b'..'d').should == false - ('a'...'a').overlap?('b'..'d').should == false - ('b'...'b').overlap?('b'...'b').should == false - ('c'...'a').overlap?('c'...'a').should == false - end - - it "return false when other Range is empty" do - (1..3).overlap?(2..0).should == false - (1..3).overlap?(2...2).should == false - - ('b'..'d').overlap?('c'..'a').should == false - ('b'..'d').overlap?('c'...'c').should == false - end - - it "takes into account exclusive end" do - (0...2).overlap?(2..4).should == false - (2..4).overlap?(0...2).should == false - - ('a'...'c').overlap?('c'..'e').should == false - ('c'..'e').overlap?('a'...'c').should == false - end +describe "Range#overlap?" do + it "returns true if other Range overlaps self" do + (0..2).overlap?(1..3).should == true + (1..3).overlap?(0..2).should == true + (0..2).overlap?(0..2).should == true + (0..3).overlap?(1..2).should == true + (1..2).overlap?(0..3).should == true + + ('a'..'c').overlap?('b'..'d').should == true + end + + it "returns false if other Range does not overlap self" do + (0..2).overlap?(3..4).should == false + (0..2).overlap?(-4..-1).should == false + + ('a'..'c').overlap?('d'..'f').should == false + end + + it "raises TypeError when called with non-Range argument" do + -> { + (0..2).overlap?(1) + }.should raise_error(TypeError, "wrong argument type Integer (expected Range)") + end + + it "returns true when beginningless and endless Ranges overlap" do + (0..2).overlap?(..3).should == true + (0..2).overlap?(..1).should == true + (0..2).overlap?(..0).should == true + + (..3).overlap?(0..2).should == true + (..1).overlap?(0..2).should == true + (..0).overlap?(0..2).should == true + + (0..2).overlap?(-1..).should == true + (0..2).overlap?(1..).should == true + (0..2).overlap?(2..).should == true + + (-1..).overlap?(0..2).should == true + (1..).overlap?(0..2).should == true + (2..).overlap?(0..2).should == true + + (0..).overlap?(2..).should == true + (..0).overlap?(..2).should == true + end + + it "returns false when beginningless and endless Ranges do not overlap" do + (0..2).overlap?(..-1).should == false + (0..2).overlap?(3..).should == false + + (..-1).overlap?(0..2).should == false + (3..).overlap?(0..2).should == false + end + + it "returns false when Ranges are not compatible" do + (0..2).overlap?('a'..'d').should == false + end + + it "return false when self is empty" do + (2..0).overlap?(1..3).should == false + (2...2).overlap?(1..3).should == false + (1...1).overlap?(1...1).should == false + (2..0).overlap?(2..0).should == false + + ('c'..'a').overlap?('b'..'d').should == false + ('a'...'a').overlap?('b'..'d').should == false + ('b'...'b').overlap?('b'...'b').should == false + ('c'...'a').overlap?('c'...'a').should == false + end + + it "return false when other Range is empty" do + (1..3).overlap?(2..0).should == false + (1..3).overlap?(2...2).should == false + + ('b'..'d').overlap?('c'..'a').should == false + ('b'..'d').overlap?('c'...'c').should == false + end + + it "takes into account exclusive end" do + (0...2).overlap?(2..4).should == false + (2..4).overlap?(0...2).should == false + + ('a'...'c').overlap?('c'..'e').should == false + ('c'..'e').overlap?('a'...'c').should == false end end diff --git a/spec/ruby/core/range/reverse_each_spec.rb b/spec/ruby/core/range/reverse_each_spec.rb index 56390cc0da4822..16aaace6afaa60 100644 --- a/spec/ruby/core/range/reverse_each_spec.rb +++ b/spec/ruby/core/range/reverse_each_spec.rb @@ -1,102 +1,124 @@ require_relative '../../spec_helper' -ruby_version_is "3.3" do - describe "Range#reverse_each" do - it "traverses the Range in reverse order and passes each element to block" do - a = [] - (1..3).reverse_each { |i| a << i } - a.should == [3, 2, 1] +describe "Range#reverse_each" do + it "traverses the Range in reverse order and passes each element to block" do + a = [] + (1..3).reverse_each { |i| a << i } + a.should == [3, 2, 1] + + a = [] + (1...3).reverse_each { |i| a << i } + a.should == [2, 1] + end - a = [] - (1...3).reverse_each { |i| a << i } - a.should == [2, 1] - end + it "returns self" do + r = (1..3) + r.reverse_each { |x| }.should equal(r) + end - it "returns self" do - r = (1..3) - r.reverse_each { |x| }.should equal(r) - end + it "returns an Enumerator if no block given" do + enum = (1..3).reverse_each + enum.should be_an_instance_of(Enumerator) + enum.to_a.should == [3, 2, 1] + end - it "returns an Enumerator if no block given" do - enum = (1..3).reverse_each - enum.should be_an_instance_of(Enumerator) - enum.to_a.should == [3, 2, 1] - end + it "raises a TypeError for endless Ranges of Integers" do + -> { + (1..).reverse_each.take(3) + }.should raise_error(TypeError, "can't iterate from NilClass") + end - it "raises a TypeError for endless Ranges of Integers" do - -> { - (1..).reverse_each.take(3) - }.should raise_error(TypeError, "can't iterate from NilClass") - end + it "raises a TypeError for endless Ranges of non-Integers" do + -> { + ("a"..).reverse_each.take(3) + }.should raise_error(TypeError, "can't iterate from NilClass") + end - it "raises a TypeError for endless Ranges of non-Integers" do - -> { - ("a"..).reverse_each.take(3) - }.should raise_error(TypeError, "can't iterate from NilClass") + context "Integer boundaries" do + it "supports beginningless Ranges" do + (..5).reverse_each.take(3).should == [5, 4, 3] end + end - context "Integer boundaries" do - it "supports beginningless Ranges" do - (..5).reverse_each.take(3).should == [5, 4, 3] - end + context "non-Integer boundaries" do + it "uses #succ to iterate a Range of non-Integer elements" do + y = mock('y') + x = mock('x') + + x.should_receive(:succ).any_number_of_times.and_return(y) + x.should_receive(:<=>).with(y).any_number_of_times.and_return(-1) + x.should_receive(:<=>).with(x).any_number_of_times.and_return(0) + y.should_receive(:<=>).with(x).any_number_of_times.and_return(1) + y.should_receive(:<=>).with(y).any_number_of_times.and_return(0) + + a = [] + (x..y).each { |i| a << i } + a.should == [x, y] end - context "non-Integer boundaries" do - it "uses #succ to iterate a Range of non-Integer elements" do - y = mock('y') - x = mock('x') + it "uses #succ to iterate a Range of Strings" do + a = [] + ('A'..'D').reverse_each { |i| a << i } + a.should == ['D','C','B','A'] + end - x.should_receive(:succ).any_number_of_times.and_return(y) - x.should_receive(:<=>).with(y).any_number_of_times.and_return(-1) - x.should_receive(:<=>).with(x).any_number_of_times.and_return(0) - y.should_receive(:<=>).with(x).any_number_of_times.and_return(1) - y.should_receive(:<=>).with(y).any_number_of_times.and_return(0) + it "uses #succ to iterate a Range of Symbols" do + a = [] + (:A..:D).reverse_each { |i| a << i } + a.should == [:D, :C, :B, :A] + end - a = [] - (x..y).each { |i| a << i } - a.should == [x, y] - end + it "raises a TypeError when `begin` value does not respond to #succ" do + -> { (Time.now..Time.now).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Time/) + -> { (//..//).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Regexp/) + -> { ([]..[]).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Array/) + end - it "uses #succ to iterate a Range of Strings" do - a = [] - ('A'..'D').reverse_each { |i| a << i } - a.should == ['D','C','B','A'] - end + it "does not support beginningless Ranges" do + -> { + (..'a').reverse_each { |x| x } + }.should raise_error(TypeError, /can't iterate from NilClass/) + end + end - it "uses #succ to iterate a Range of Symbols" do - a = [] - (:A..:D).reverse_each { |i| a << i } - a.should == [:D, :C, :B, :A] - end + context "when no block is given" do + describe "returned Enumerator size" do + it "returns the Range size when Range size is finite" do + (1..3).reverse_each.size.should == 3 + (1...3).reverse_each.size.should == 2 - it "raises a TypeError when `begin` value does not respond to #succ" do - -> { (Time.now..Time.now).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Time/) - -> { (//..//).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Regexp/) - -> { ([]..[]).reverse_each { |x| x } }.should raise_error(TypeError, /can't iterate from Array/) + (1..3.3).reverse_each.size.should == 3 + (1...3.3).reverse_each.size.should == 3 end - it "does not support beginningless Ranges" do - -> { - (..'a').reverse_each { |x| x } - }.should raise_error(TypeError, /can't iterate from NilClass/) + ruby_version_is ""..."3.4" do + it "returns a size when it is not iterable" do + (1.1..3).reverse_each.size.should == 2 + (1.1..3.3).reverse_each.size.should == 3 + (1.1..nil).reverse_each.size.should == Float::INFINITY + (nil..3.3).reverse_each.size.should == Float::INFINITY + (nil..nil).reverse_each.size.should == nil + end end - end - context "when no block is given" do - describe "returned Enumerator size" do - it "returns the Range size when Range size is finite" do - (1..3).reverse_each.size.should == 3 + ruby_version_is "3.4" do + it "raises TypeError when the range is not iterable" do + -> { (1.1..3).reverse_each.size }.should raise_error(TypeError, /can't iterate from Integer/) + -> { (1.1..3.3).reverse_each.size }.should raise_error(TypeError, /can't iterate from Float/) + -> { (1.1..nil).reverse_each.size }.should raise_error(TypeError, /can't iterate from NilClass/) + -> { (nil..3.3).reverse_each.size }.should raise_error(TypeError, /can't iterate from Float/) + -> { (nil..nil).reverse_each.size }.should raise_error(TypeError, /can't iterate from NilClass/) end + end - ruby_bug "#20936", "3.4"..."4.0" do - it "returns Infinity when Range size is infinite" do - (..3).reverse_each.size.should == Float::INFINITY - end + ruby_bug "#20936", "3.4"..."4.0" do + it "returns Infinity when Range size is infinite" do + (..3).reverse_each.size.should == Float::INFINITY end + end - it "returns nil when Range size is unknown" do - ('a'..'z').reverse_each.size.should == nil - end + it "returns nil when Range size is unknown" do + ('a'..'z').reverse_each.size.should == nil end end end diff --git a/spec/ruby/core/range/to_set_spec.rb b/spec/ruby/core/range/to_set_spec.rb index 589c0e9aedec26..14e0ce1e31eac1 100644 --- a/spec/ruby/core/range/to_set_spec.rb +++ b/spec/ruby/core/range/to_set_spec.rb @@ -1,7 +1,7 @@ require_relative '../../spec_helper' require_relative '../enumerable/fixtures/classes' -describe "Enumerable#to_set" do +describe "Range#to_set" do it "returns a new Set created from self" do (1..4).to_set.should == Set[1, 2, 3, 4] (1...4).to_set.should == Set[1, 2, 3] @@ -11,45 +11,44 @@ (1..3).to_set { |x| x * x }.should == Set[1, 4, 9] end + it "raises a TypeError for a beginningless range" do + -> { + (..0).to_set + }.should raise_error(TypeError, "can't iterate from NilClass") + end + ruby_version_is "4.0" do - it "raises a RangeError if the range is infinite" do + it "raises a RangeError if the range is endless" do -> { (1..).to_set }.should raise_error(RangeError, "cannot convert endless range to a set") -> { (1...).to_set }.should raise_error(RangeError, "cannot convert endless range to a set") end end - ruby_version_is ""..."4.0" do - it "instantiates an object of provided as the first argument set class" do - set = (1..3).to_set(EnumerableSpecs::SetSubclass) - set.should be_kind_of(EnumerableSpecs::SetSubclass) - set.to_a.sort.should == [1, 2, 3] - end - end - - ruby_version_is "4.0"..."4.1" do - it "instantiates an object of provided as the first argument set class and warns" do - set = nil - proc { + context "given positional arguments" do + ruby_version_is ""..."4.0" do + it "instantiates an object of provided as the first argument set class" do set = (1..3).to_set(EnumerableSpecs::SetSubclass) - }.should complain(/Enumerable#to_set/) - set.should be_kind_of(EnumerableSpecs::SetSubclass) - set.to_a.sort.should == [1, 2, 3] + set.should be_kind_of(EnumerableSpecs::SetSubclass) + set.to_a.sort.should == [1, 2, 3] + end end - end - ruby_version_is "4.1" do - it "does not accept any positional argument" do - -> { - (1..3).to_set(EnumerableSpecs::SetSubclass) - }.should raise_error(ArgumentError, 'wrong number of arguments (given 1, expected 0)') + ruby_version_is "4.0"..."4.1" do + it "instantiates an object of provided as the first argument set class and warns" do + -> { + set = (1..3).to_set(EnumerableSpecs::SetSubclass) + set.should be_kind_of(EnumerableSpecs::SetSubclass) + set.to_a.sort.should == [1, 2, 3] + }.should complain(/warning: passing arguments to Enumerable#to_set is deprecated/) + end end - end - it "does not need explicit `require 'set'`" do - output = ruby_exe(<<~RUBY, options: '--disable-gems', args: '2>&1') - puts (1..3).to_set.to_a.inspect - RUBY - - output.chomp.should == "[1, 2, 3]" + ruby_version_is "4.1" do + it "does not accept any positional argument" do + -> { + (1..3).to_set(EnumerableSpecs::SetSubclass) + }.should raise_error(ArgumentError, "wrong number of arguments (given 1, expected 0)") + end + end end end diff --git a/spec/ruby/core/rational/ceil_spec.rb b/spec/ruby/core/rational/ceil_spec.rb index d5bdadf3b6b000..0c0327448f35c6 100644 --- a/spec/ruby/core/rational/ceil_spec.rb +++ b/spec/ruby/core/rational/ceil_spec.rb @@ -1,45 +1,48 @@ require_relative "../../spec_helper" +require_relative "../integer/shared/integer_ceil_precision" describe "Rational#ceil" do + context "with values equal to integers" do + it_behaves_like :integer_ceil_precision, :Rational + end + before do @rational = Rational(2200, 7) end describe "with no arguments (precision = 0)" do - it "returns an Integer" do - @rational.ceil.should be_kind_of(Integer) - end + it "returns the Integer value rounded toward positive infinity" do + @rational.ceil.should eql 315 - it "returns the truncated value toward positive infinity" do - @rational.ceil.should == 315 - Rational(1, 2).ceil.should == 1 - Rational(-1, 2).ceil.should == 0 + Rational(1, 2).ceil.should eql 1 + Rational(-1, 2).ceil.should eql 0 + Rational(1, 1).ceil.should eql 1 end end describe "with a precision < 0" do - it "returns an Integer" do - @rational.ceil(-2).should be_kind_of(Integer) - @rational.ceil(-1).should be_kind_of(Integer) - end + it "moves the rounding point n decimal places left, returning an Integer" do + @rational.ceil(-3).should eql 1000 + @rational.ceil(-2).should eql 400 + @rational.ceil(-1).should eql 320 - it "moves the truncation point n decimal places left" do - @rational.ceil(-3).should == 1000 - @rational.ceil(-2).should == 400 - @rational.ceil(-1).should == 320 + Rational(100, 2).ceil(-1).should eql 50 + Rational(100, 2).ceil(-2).should eql 100 + Rational(-100, 2).ceil(-1).should eql(-50) + Rational(-100, 2).ceil(-2).should eql(0) end end describe "with precision > 0" do - it "returns a Rational" do - @rational.ceil(1).should be_kind_of(Rational) - @rational.ceil(2).should be_kind_of(Rational) - end + it "moves the rounding point n decimal places right, returning a Rational" do + @rational.ceil(1).should eql Rational(3143, 10) + @rational.ceil(2).should eql Rational(31429, 100) + @rational.ceil(3).should eql Rational(157143, 500) - it "moves the truncation point n decimal places right" do - @rational.ceil(1).should == Rational(3143, 10) - @rational.ceil(2).should == Rational(31429, 100) - @rational.ceil(3).should == Rational(157143, 500) + Rational(100, 2).ceil(1).should eql Rational(50, 1) + Rational(100, 2).ceil(2).should eql Rational(50, 1) + Rational(-100, 2).ceil(1).should eql Rational(-50, 1) + Rational(-100, 2).ceil(2).should eql Rational(-50, 1) end end end diff --git a/spec/ruby/core/rational/exponent_spec.rb b/spec/ruby/core/rational/exponent_spec.rb index 65fbf2ed1ca895..1f8a03740cc087 100644 --- a/spec/ruby/core/rational/exponent_spec.rb +++ b/spec/ruby/core/rational/exponent_spec.rb @@ -108,37 +108,37 @@ it "raises an ArgumentError when self is > 1" do -> { (Rational(2) ** bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") -> { (Rational(fixnum_max) ** bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") end it "raises an ArgumentError when self is > 1 and the exponent is negative" do -> { (Rational(2) ** -bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") -> { (Rational(fixnum_max) ** -bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") end it "raises an ArgumentError when self is < -1" do -> { (Rational(-2) ** bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") -> { (Rational(fixnum_min) ** bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") end it "raises an ArgumentError when self is < -1 and the exponent is negative" do -> { (Rational(-2) ** -bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") -> { (Rational(fixnum_min) ** -bignum_value) - }.should raise_error(ArgumentError) + }.should raise_error(ArgumentError, "exponent is too large") end end diff --git a/spec/ruby/core/rational/floor_spec.rb b/spec/ruby/core/rational/floor_spec.rb index 8068aaf119e70f..5108e363f7a67c 100644 --- a/spec/ruby/core/rational/floor_spec.rb +++ b/spec/ruby/core/rational/floor_spec.rb @@ -1,45 +1,49 @@ require_relative "../../spec_helper" +require_relative "../integer/shared/integer_floor_precision" describe "Rational#floor" do + context "with values equal to integers" do + it_behaves_like :integer_floor_precision, :Rational + end + before do @rational = Rational(2200, 7) end describe "with no arguments (precision = 0)" do - it "returns an integer" do - @rational.floor.should be_kind_of(Integer) - end - it "returns the truncated value toward negative infinity" do - @rational.floor.should == 314 - Rational(1, 2).floor.should == 0 - Rational(-1, 2).floor.should == -1 + it "returns the Integer value rounded toward negative infinity" do + @rational.floor.should eql 314 + + Rational(1, 2).floor.should eql 0 + Rational(-1, 2).floor.should eql(-1) + Rational(1, 1).floor.should eql 1 end end describe "with a precision < 0" do - it "returns an integer" do - @rational.floor(-2).should be_kind_of(Integer) - @rational.floor(-1).should be_kind_of(Integer) - end + it "moves the rounding point n decimal places left, returning an Integer" do + @rational.floor(-3).should eql 0 + @rational.floor(-2).should eql 300 + @rational.floor(-1).should eql 310 - it "moves the truncation point n decimal places left" do - @rational.floor(-3).should == 0 - @rational.floor(-2).should == 300 - @rational.floor(-1).should == 310 + Rational(100, 2).floor(-1).should eql 50 + Rational(100, 2).floor(-2).should eql 0 + Rational(-100, 2).floor(-1).should eql(-50) + Rational(-100, 2).floor(-2).should eql(-100) end end describe "with a precision > 0" do - it "returns a Rational" do - @rational.floor(1).should be_kind_of(Rational) - @rational.floor(2).should be_kind_of(Rational) - end + it "moves the rounding point n decimal places right, returning a Rational" do + @rational.floor(1).should eql Rational(1571, 5) + @rational.floor(2).should eql Rational(7857, 25) + @rational.floor(3).should eql Rational(62857, 200) - it "moves the truncation point n decimal places right" do - @rational.floor(1).should == Rational(1571, 5) - @rational.floor(2).should == Rational(7857, 25) - @rational.floor(3).should == Rational(62857, 200) + Rational(100, 2).floor(1).should eql Rational(50, 1) + Rational(100, 2).floor(2).should eql Rational(50, 1) + Rational(-100, 2).floor(1).should eql Rational(-50, 1) + Rational(-100, 2).floor(2).should eql Rational(-50, 1) end end end diff --git a/spec/ruby/core/refinement/refined_class_spec.rb b/spec/ruby/core/refinement/refined_class_spec.rb index 60a58380ccf00b..b532d9a7738cca 100644 --- a/spec/ruby/core/refinement/refined_class_spec.rb +++ b/spec/ruby/core/refinement/refined_class_spec.rb @@ -2,11 +2,7 @@ require_relative 'shared/target' describe "Refinement#refined_class" do - ruby_version_is ""..."3.3" do - it_behaves_like :refinement_target, :refined_class - end - - ruby_version_is "3.3"..."3.4" do + ruby_version_is ""..."3.4" do it "has been deprecated in favour of Refinement#target" do refinement_int = nil diff --git a/spec/ruby/core/refinement/target_spec.rb b/spec/ruby/core/refinement/target_spec.rb index fee9588a96ed65..8bd816aea622dd 100644 --- a/spec/ruby/core/refinement/target_spec.rb +++ b/spec/ruby/core/refinement/target_spec.rb @@ -2,7 +2,5 @@ require_relative 'shared/target' describe "Refinement#target" do - ruby_version_is "3.3" do - it_behaves_like :refinement_target, :target - end + it_behaves_like :refinement_target, :target end diff --git a/spec/ruby/core/regexp/linear_time_spec.rb b/spec/ruby/core/regexp/linear_time_spec.rb index cf9e73c37c2b64..2f3f81ed207236 100644 --- a/spec/ruby/core/regexp/linear_time_spec.rb +++ b/spec/ruby/core/regexp/linear_time_spec.rb @@ -25,9 +25,7 @@ }.should complain(/warning: flags ignored/) end - ruby_version_is "3.3" do - it "returns true for positive lookarounds" do - Regexp.linear_time?(/(?:(?=a*)a)*/).should == true - end + it "returns true for positive lookarounds" do + Regexp.linear_time?(/(?:(?=a*)a)*/).should == true end end diff --git a/spec/ruby/core/set/flatten_spec.rb b/spec/ruby/core/set/flatten_spec.rb index f2cb3dfa524a35..b26bc8481af58f 100644 --- a/spec/ruby/core/set/flatten_spec.rb +++ b/spec/ruby/core/set/flatten_spec.rb @@ -46,14 +46,4 @@ (set = Set[]) << set -> { set.flatten! }.should raise_error(ArgumentError) end - - version_is(set_version, ""..."1.1.0") do #ruby_version_is ""..."3.3" do - ruby_version_is ""..."4.0" do - context "when Set contains a Set-like object" do - it "flattens self, including Set-like objects" do - Set[SetSpecs::SetLike.new([1])].flatten!.should == Set[1] - end - end - end - end end diff --git a/spec/ruby/core/set/merge_spec.rb b/spec/ruby/core/set/merge_spec.rb index 0c6ed276700e7d..bf945cdcc02238 100644 --- a/spec/ruby/core/set/merge_spec.rb +++ b/spec/ruby/core/set/merge_spec.rb @@ -23,15 +23,7 @@ end end - ruby_version_is ""..."3.3" do - it "accepts only a single argument" do - -> { Set[].merge([], []) }.should raise_error(ArgumentError, "wrong number of arguments (given 2, expected 1)") - end - end - - ruby_version_is "3.3" do - it "accepts multiple arguments" do - Set[:a, :b].merge(Set[:b, :c], [:d]).should == Set[:a, :b, :c, :d] - end + it "accepts multiple arguments" do + Set[:a, :b].merge(Set[:b, :c], [:d]).should == Set[:a, :b, :c, :d] end end diff --git a/spec/ruby/core/set/proper_subset_spec.rb b/spec/ruby/core/set/proper_subset_spec.rb index fb7848c0015200..6f99447019b852 100644 --- a/spec/ruby/core/set/proper_subset_spec.rb +++ b/spec/ruby/core/set/proper_subset_spec.rb @@ -32,14 +32,4 @@ -> { Set[].proper_subset?("test") }.should raise_error(ArgumentError) -> { Set[].proper_subset?(Object.new) }.should raise_error(ArgumentError) end - - version_is(set_version, ""..."1.1.0") do #ruby_version_is ""..."3.3" do - ruby_version_is ""..."4.0" do - context "when comparing to a Set-like object" do - it "returns true if passed a Set-like object that self is a proper subset of" do - Set[1, 2, 3].proper_subset?(SetSpecs::SetLike.new([1, 2, 3, 4])).should be_true - end - end - end - end end diff --git a/spec/ruby/core/set/subset_spec.rb b/spec/ruby/core/set/subset_spec.rb index 112bd9b38adc12..da80d174da4fa1 100644 --- a/spec/ruby/core/set/subset_spec.rb +++ b/spec/ruby/core/set/subset_spec.rb @@ -32,14 +32,4 @@ -> { Set[].subset?("test") }.should raise_error(ArgumentError) -> { Set[].subset?(Object.new) }.should raise_error(ArgumentError) end - - version_is(set_version, ""..."1.1.0") do #ruby_version_is ""..."3.3" do - ruby_version_is ""..."4.0" do - context "when comparing to a Set-like object" do - it "returns true if passed a Set-like object that self is a subset of" do - Set[1, 2, 3].subset?(SetSpecs::SetLike.new([1, 2, 3, 4])).should be_true - end - end - end - end end diff --git a/spec/ruby/core/string/append_as_bytes_spec.rb b/spec/ruby/core/string/append_as_bytes_spec.rb index b1703e5f89baf9..def663d5ce2239 100644 --- a/spec/ruby/core/string/append_as_bytes_spec.rb +++ b/spec/ruby/core/string/append_as_bytes_spec.rb @@ -7,14 +7,16 @@ -> { str.append_as_bytes("\xE2\x82") }.should raise_error(FrozenError) end - it "allows creating broken strings" do + it "allows creating broken strings in UTF8" do str = +"hello" str.append_as_bytes("\xE2\x82") str.valid_encoding?.should == false str.append_as_bytes("\xAC") str.valid_encoding?.should == true + end + it "allows creating broken strings in UTF_32" do str = "abc".encode(Encoding::UTF_32LE) str.append_as_bytes("def") str.encoding.should == Encoding::UTF_32LE diff --git a/spec/ruby/core/string/bytesplice_spec.rb b/spec/ruby/core/string/bytesplice_spec.rb index 2c770e340aad27..cfd9e3ea9a7f39 100644 --- a/spec/ruby/core/string/bytesplice_spec.rb +++ b/spec/ruby/core/string/bytesplice_spec.rb @@ -57,77 +57,75 @@ -> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") end - ruby_version_is "3.3" do - it "raises IndexError when str_index is less than -bytesize" do - -> { "hello".bytesplice(2, 1, "HELLO", -6, 0) }.should raise_error(IndexError, "index -6 out of string") - end - - it "raises IndexError when str_index is greater than bytesize" do - -> { "hello".bytesplice(2, 1, "HELLO", 6, 0) }.should raise_error(IndexError, "index 6 out of string") - end - - it "raises IndexError for negative str length" do - -> { "abc".bytesplice(0, 1, "", 0, -2) }.should raise_error(IndexError, "negative length -2") - end - - it "replaces with integer str indices" do - "hello".bytesplice(1, 2, "HELLO", -5, 0).should == "hlo" - "hello".bytesplice(1, 2, "HELLO", 0, 0).should == "hlo" - "hello".bytesplice(1, 2, "HELLO", 0, 1).should == "hHlo" - "hello".bytesplice(1, 2, "HELLO", 0, 5).should == "hHELLOlo" - "hello".bytesplice(1, 2, "HELLO", 0, 6).should == "hHELLOlo" - end - - it "raises RangeError when str range left boundary is less than -bytesize" do - -> { "hello".bytesplice(0..1, "HELLO", -6...-6) }.should raise_error(RangeError, "-6...-6 out of range") - end - - it "replaces with str ranges" do - "hello".bytesplice(1..2, "HELLO", -5...-5).should == "hlo" - "hello".bytesplice(1..2, "HELLO", 0...0).should == "hlo" - "hello".bytesplice(1..2, "HELLO", 0..0).should == "hHlo" - "hello".bytesplice(1..2, "HELLO", 0...1).should == "hHlo" - "hello".bytesplice(1..2, "HELLO", 0..1).should == "hHElo" - "hello".bytesplice(1..2, "HELLO", 0..-1).should == "hHELLOlo" - "hello".bytesplice(1..2, "HELLO", 0...5).should == "hHELLOlo" - "hello".bytesplice(1..2, "HELLO", 0...6).should == "hHELLOlo" - end - - it "raises ArgumentError when integer str index is provided without str length argument" do - -> { "hello".bytesplice(0, 1, "xxx", 0) }.should raise_error(ArgumentError, "wrong number of arguments (given 4, expected 2, 3, or 5)") - end - - it "replaces on an empty string with str index/length" do - "".bytesplice(0, 0, "", 0, 0).should == "" - "".bytesplice(0, 0, "xxx", 0, 1).should == "x" - end - - it "mutates self with substring and str index/length" do - s = "hello" - s.bytesplice(2, 1, "xxx", 1, 2).should.equal?(s) - s.should.eql?("hexxlo") - end - - it "raises when string is frozen and str index/length" do - s = "hello".freeze - -> { s.bytesplice(2, 1, "xxx", 0, 1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") - end - - it "replaces on an empty string with str range" do - "".bytesplice(0..0, "", 0..0).should == "" - "".bytesplice(0..0, "xyz", 0..1).should == "xy" - end - - it "mutates self with substring and str range" do - s = "hello" - s.bytesplice(2..2, "xyz", 1..2).should.equal?(s) - s.should.eql?("heyzlo") - end - - it "raises when string is frozen and str range" do - s = "hello".freeze - -> { s.bytesplice(2..2, "yzx", 0..1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") - end + it "raises IndexError when str_index is less than -bytesize" do + -> { "hello".bytesplice(2, 1, "HELLO", -6, 0) }.should raise_error(IndexError, "index -6 out of string") + end + + it "raises IndexError when str_index is greater than bytesize" do + -> { "hello".bytesplice(2, 1, "HELLO", 6, 0) }.should raise_error(IndexError, "index 6 out of string") + end + + it "raises IndexError for negative str length" do + -> { "abc".bytesplice(0, 1, "", 0, -2) }.should raise_error(IndexError, "negative length -2") + end + + it "replaces with integer str indices" do + "hello".bytesplice(1, 2, "HELLO", -5, 0).should == "hlo" + "hello".bytesplice(1, 2, "HELLO", 0, 0).should == "hlo" + "hello".bytesplice(1, 2, "HELLO", 0, 1).should == "hHlo" + "hello".bytesplice(1, 2, "HELLO", 0, 5).should == "hHELLOlo" + "hello".bytesplice(1, 2, "HELLO", 0, 6).should == "hHELLOlo" + end + + it "raises RangeError when str range left boundary is less than -bytesize" do + -> { "hello".bytesplice(0..1, "HELLO", -6...-6) }.should raise_error(RangeError, "-6...-6 out of range") + end + + it "replaces with str ranges" do + "hello".bytesplice(1..2, "HELLO", -5...-5).should == "hlo" + "hello".bytesplice(1..2, "HELLO", 0...0).should == "hlo" + "hello".bytesplice(1..2, "HELLO", 0..0).should == "hHlo" + "hello".bytesplice(1..2, "HELLO", 0...1).should == "hHlo" + "hello".bytesplice(1..2, "HELLO", 0..1).should == "hHElo" + "hello".bytesplice(1..2, "HELLO", 0..-1).should == "hHELLOlo" + "hello".bytesplice(1..2, "HELLO", 0...5).should == "hHELLOlo" + "hello".bytesplice(1..2, "HELLO", 0...6).should == "hHELLOlo" + end + + it "raises ArgumentError when integer str index is provided without str length argument" do + -> { "hello".bytesplice(0, 1, "xxx", 0) }.should raise_error(ArgumentError, "wrong number of arguments (given 4, expected 2, 3, or 5)") + end + + it "replaces on an empty string with str index/length" do + "".bytesplice(0, 0, "", 0, 0).should == "" + "".bytesplice(0, 0, "xxx", 0, 1).should == "x" + end + + it "mutates self with substring and str index/length" do + s = "hello" + s.bytesplice(2, 1, "xxx", 1, 2).should.equal?(s) + s.should.eql?("hexxlo") + end + + it "raises when string is frozen and str index/length" do + s = "hello".freeze + -> { s.bytesplice(2, 1, "xxx", 0, 1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") + end + + it "replaces on an empty string with str range" do + "".bytesplice(0..0, "", 0..0).should == "" + "".bytesplice(0..0, "xyz", 0..1).should == "xy" + end + + it "mutates self with substring and str range" do + s = "hello" + s.bytesplice(2..2, "xyz", 1..2).should.equal?(s) + s.should.eql?("heyzlo") + end + + it "raises when string is frozen and str range" do + s = "hello".freeze + -> { s.bytesplice(2..2, "yzx", 0..1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") end end @@ -201,94 +199,92 @@ result.encoding.should == Encoding::UTF_8 end - ruby_version_is "3.3" do - it "raises IndexError when str_index is out of byte size boundary" do - -> { "こんにちは".bytesplice(3, 3, "こんにちは", -16, 0) }.should raise_error(IndexError, "index -16 out of string") - end - - it "raises IndexError when str_index is not on a codepoint boundary" do - -> { "こんにちは".bytesplice(3, 3, "こんにちは", 1, 0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") - end - - it "raises IndexError when str_length is not matching the codepoint boundary" do - -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary") - -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary") - end - - it "replaces with integer str indices" do - "こんにちは".bytesplice(3, 3, "こんにちは", -15, 0).should == "こにちは" - "こんにちは".bytesplice(3, 3, "こんにちは", 0, 0).should == "こにちは" - "こんにちは".bytesplice(3, 3, "こんにちは", 0, 3).should == "ここにちは" - "こんにちは".bytesplice(3, 3, "はは", 3, 3).should == "こはにちは" - "こんにちは".bytesplice(3, 3, "こんにちは", 15, 0).should == "こにちは" - end - - it "replaces with str range" do - "こんにちは".bytesplice(0..2, "こんにちは", -15...-16).should == "んにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 0...0).should == "んにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 3..5).should == "んんにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 3...6).should == "んんにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 3..8).should == "んにんにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 0..-1).should == "こんにちはんにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 0...15).should == "こんにちはんにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 0...18).should == "こんにちはんにちは" - end - - it "treats negative length for str range as 0" do - "こんにちは".bytesplice(0..2, "こんにちは", 0...-100).should == "んにちは" - "こんにちは".bytesplice(0..2, "こんにちは", 3...-100).should == "んにちは" - "こんにちは".bytesplice(0..2, "こんにちは", -15...-100).should == "んにちは" - end - - it "raises when ranges not match codepoint boundaries in str" do - -> { "こんにちは".bytesplice(3...3, "こ", 0..0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") - -> { "こんにちは".bytesplice(3...3, "こ", 0..1) }.should raise_error(IndexError, "offset 2 does not land on character boundary") - # Begin is incorrect - -> { "こんにちは".bytesplice(3...3, "こんにちは", -4..-1) }.should raise_error(IndexError, "offset 11 does not land on character boundary") - -> { "こんにちは".bytesplice(3...3, "こんにちは", -5..-1) }.should raise_error(IndexError, "offset 10 does not land on character boundary") - # End is incorrect - -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-2) }.should raise_error(IndexError, "offset 14 does not land on character boundary") - -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-3) }.should raise_error(IndexError, "offset 13 does not land on character boundary") - end - - it "deals with a different encoded argument with str index/length" do - s = "こんにちは" - s.encoding.should == Encoding::UTF_8 - sub = "goodbye" - sub.force_encoding(Encoding::US_ASCII) - - result = s.bytesplice(3, 3, sub, 0, 3) - result.should == "こgooにちは" - result.encoding.should == Encoding::UTF_8 - - s = "hello" - s.force_encoding(Encoding::US_ASCII) - sub = "こんにちは" - sub.encoding.should == Encoding::UTF_8 - - result = s.bytesplice(1, 2, sub, 3, 3) - result.should == "hんlo" - result.encoding.should == Encoding::UTF_8 - end - - it "deals with a different encoded argument with str range" do - s = "こんにちは" - s.encoding.should == Encoding::UTF_8 - sub = "goodbye" - sub.force_encoding(Encoding::US_ASCII) - - result = s.bytesplice(3..5, sub, 0..2) - result.should == "こgooにちは" - result.encoding.should == Encoding::UTF_8 - - s = "hello" - s.force_encoding(Encoding::US_ASCII) - sub = "こんにちは" - sub.encoding.should == Encoding::UTF_8 - - result = s.bytesplice(1..2, sub, 3..5) - result.should == "hんlo" - result.encoding.should == Encoding::UTF_8 - end + it "raises IndexError when str_index is out of byte size boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", -16, 0) }.should raise_error(IndexError, "index -16 out of string") + end + + it "raises IndexError when str_index is not on a codepoint boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 1, 0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + end + + it "raises IndexError when str_length is not matching the codepoint boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + end + + it "replaces with integer str indices" do + "こんにちは".bytesplice(3, 3, "こんにちは", -15, 0).should == "こにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 0, 0).should == "こにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 0, 3).should == "ここにちは" + "こんにちは".bytesplice(3, 3, "はは", 3, 3).should == "こはにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 15, 0).should == "こにちは" + end + + it "replaces with str range" do + "こんにちは".bytesplice(0..2, "こんにちは", -15...-16).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...0).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3..5).should == "んんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3...6).should == "んんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3..8).should == "んにんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0..-1).should == "こんにちはんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...15).should == "こんにちはんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...18).should == "こんにちはんにちは" + end + + it "treats negative length for str range as 0" do + "こんにちは".bytesplice(0..2, "こんにちは", 0...-100).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3...-100).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", -15...-100).should == "んにちは" + end + + it "raises when ranges not match codepoint boundaries in str" do + -> { "こんにちは".bytesplice(3...3, "こ", 0..0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こ", 0..1) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + # Begin is incorrect + -> { "こんにちは".bytesplice(3...3, "こんにちは", -4..-1) }.should raise_error(IndexError, "offset 11 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こんにちは", -5..-1) }.should raise_error(IndexError, "offset 10 does not land on character boundary") + # End is incorrect + -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-2) }.should raise_error(IndexError, "offset 14 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-3) }.should raise_error(IndexError, "offset 13 does not land on character boundary") + end + + it "deals with a different encoded argument with str index/length" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "goodbye" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(3, 3, sub, 0, 3) + result.should == "こgooにちは" + result.encoding.should == Encoding::UTF_8 + + s = "hello" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(1, 2, sub, 3, 3) + result.should == "hんlo" + result.encoding.should == Encoding::UTF_8 + end + + it "deals with a different encoded argument with str range" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "goodbye" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(3..5, sub, 0..2) + result.should == "こgooにちは" + result.encoding.should == Encoding::UTF_8 + + s = "hello" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(1..2, sub, 3..5) + result.should == "hんlo" + result.encoding.should == Encoding::UTF_8 end end diff --git a/spec/ruby/core/string/index_spec.rb b/spec/ruby/core/string/index_spec.rb index 835263a2cd58c2..01e6a6a4009720 100644 --- a/spec/ruby/core/string/index_spec.rb +++ b/spec/ruby/core/string/index_spec.rb @@ -231,15 +231,13 @@ $~.should == nil end - ruby_bug "#20421", ""..."3.3" do - it "always clear $~" do - "a".index(/a/) - $~.should_not == nil - - string = "blablabla" - string.index(/bla/, string.length + 1) - $~.should == nil - end + it "always clear $~" do + "a".index(/a/) + $~.should_not == nil + + string = "blablabla" + string.index(/bla/, string.length + 1) + $~.should == nil end it "starts the search at the given offset" do @@ -330,21 +328,10 @@ "われわわれ".index(/わ/, 3).should == 3 end - ruby_bug "#19763", ""..."3.3.0" do - it "raises an Encoding::CompatibilityError if the encodings are incompatible" do - re = Regexp.new "れ".encode(Encoding::EUC_JP) - -> do - "あれ".index re - end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)") - end - end - - # The exception message was incorrectly "incompatible character encodings: UTF-8 and EUC-JP" before 3.3.0 - # Still test that the right exception class is used before that. it "raises an Encoding::CompatibilityError if the encodings are incompatible" do re = Regexp.new "れ".encode(Encoding::EUC_JP) -> do "あれ".index re - end.should raise_error(Encoding::CompatibilityError) + end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)") end end diff --git a/spec/ruby/core/string/shared/chars.rb b/spec/ruby/core/string/shared/chars.rb index c730643cf49874..74a32fb513f024 100644 --- a/spec/ruby/core/string/shared/chars.rb +++ b/spec/ruby/core/string/shared/chars.rb @@ -14,7 +14,6 @@ s.send(@method){}.should equal(s) end - it "is unicode aware" do "\303\207\342\210\202\303\251\306\222g".send(@method).to_a.should == ["\303\207", "\342\210\202", "\303\251", "\306\222", "g"] @@ -63,4 +62,25 @@ [0xA2].pack('C').force_encoding('SJIS') ] end + + it "returns individual chars for dummy encodings" do + "ab".dup.force_encoding(Encoding::UTF_7).send(@method).to_a.should == [ + "\x61".dup.force_encoding(Encoding::UTF_7), + "\x62".dup.force_encoding(Encoding::UTF_7) + ] + + "abcd".dup.force_encoding(Encoding::UTF_16).send(@method).to_a.should == [ + "\x61".dup.force_encoding(Encoding::UTF_16), + "\x62".dup.force_encoding(Encoding::UTF_16), + "\x63".dup.force_encoding(Encoding::UTF_16), + "\x64".dup.force_encoding(Encoding::UTF_16) + ] + + "abcd".dup.force_encoding(Encoding::UTF_32).send(@method).to_a.should == [ + "\x61".dup.force_encoding(Encoding::UTF_32), + "\x62".dup.force_encoding(Encoding::UTF_32), + "\x63".dup.force_encoding(Encoding::UTF_32), + "\x64".dup.force_encoding(Encoding::UTF_32) + ] + end end diff --git a/spec/ruby/core/string/shared/codepoints.rb b/spec/ruby/core/string/shared/codepoints.rb index 1c28ba3d5e22ff..ecdf7d719db553 100644 --- a/spec/ruby/core/string/shared/codepoints.rb +++ b/spec/ruby/core/string/shared/codepoints.rb @@ -59,4 +59,9 @@ s.ascii_only?.should be_true s.send(@method).to_a.should == s.bytes.to_a end + + it "returns individual bytes for dummy encodings UTF-16 and UTF-32" do + "abcd".dup.force_encoding(Encoding::UTF_16).send(@method).to_a.should == [97, 98, 99, 100] + "abcd".dup.force_encoding(Encoding::UTF_32).send(@method).to_a.should == [97, 98, 99, 100] + end end diff --git a/spec/ruby/core/string/shared/each_line.rb b/spec/ruby/core/string/shared/each_line.rb index 231a6d9d4ff3a2..c2f3abfa80e0a6 100644 --- a/spec/ruby/core/string/shared/each_line.rb +++ b/spec/ruby/core/string/shared/each_line.rb @@ -159,4 +159,18 @@ a.should == ["hello\r\n", "world\r\n"] end end + + it "does not split lines for dummy UTF-16" do + "a\nb".encode(Encoding::UTF_16).lines.should == [ + "\xFE\xFF\x00\x61\x00\n\x00\x62".dup.force_encoding(Encoding::UTF_16) + ] + + str = "\x00\n\n\x00".dup.force_encoding(Encoding::UTF_16) + str.lines.should == [str] + end + + it "raises Encoding::ConverterNotFoundError for dummy UTF-7" do + str = "a\nb".dup.force_encoding(Encoding::UTF_7) + -> { str.lines }.should raise_error(Encoding::ConverterNotFoundError) + end end diff --git a/spec/ruby/core/string/shared/grapheme_clusters.rb b/spec/ruby/core/string/shared/grapheme_clusters.rb index 8b666868b1df68..985b558f08a03e 100644 --- a/spec/ruby/core/string/shared/grapheme_clusters.rb +++ b/spec/ruby/core/string/shared/grapheme_clusters.rb @@ -9,6 +9,15 @@ a.should == ['a', 'b', "\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}", "\u{1F43E}"] end + it "returns grapheme clusters for various UTF encodings" do + [Encoding::UTF_16LE, Encoding::UTF_16BE, Encoding::UTF_32LE, Encoding::UTF_32BE].each do |enc| + a = [] + # test string: abc[rainbow flag emoji][paw prints] + "ab\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}\u{1F43E}".encode(enc).send(@method) { |c| a << c } + a.should == ['a', 'b', "\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}", "\u{1F43E}"].map { |s| s.encode(enc) } + end + end + it "returns self" do s = StringSpecs::MyString.new "ab\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}\u{1F43E}" s.send(@method) {}.should equal(s) diff --git a/spec/ruby/core/string/start_with_spec.rb b/spec/ruby/core/string/start_with_spec.rb index 35e33b46a668ee..8b0ba6b5a7ec3b 100644 --- a/spec/ruby/core/string/start_with_spec.rb +++ b/spec/ruby/core/string/start_with_spec.rb @@ -11,17 +11,8 @@ "\xA9".should.start_with?("\xA9") # A9 is not a character head for UTF-8 end - ruby_version_is ""..."3.3" do - it "does not check we are matching only part of a character" do - "\xe3\x81\x82".size.should == 1 - "\xe3\x81\x82".should.start_with?("\xe3") - end - end - - ruby_version_is "3.3" do # #19784 - it "checks we are matching only part of a character" do - "\xe3\x81\x82".size.should == 1 - "\xe3\x81\x82".should_not.start_with?("\xe3") - end + it "checks we are matching only part of a character" do + "\xe3\x81\x82".size.should == 1 + "\xe3\x81\x82".should_not.start_with?("\xe3") end end diff --git a/spec/ruby/core/string/tr_s_spec.rb b/spec/ruby/core/string/tr_s_spec.rb index dd72da440c93d5..693ff8ace21bb5 100644 --- a/spec/ruby/core/string/tr_s_spec.rb +++ b/spec/ruby/core/string/tr_s_spec.rb @@ -18,13 +18,11 @@ "hello ^--^".tr_s("---", "_").should == "hello ^_^" end - ruby_bug "#19769", ""..."3.3" do - it "accepts c1-c1 notation to denote range of one character" do - "hello".tr_s('e-e', 'x').should == "hxllo" - "123456789".tr_s("2-23","xy").should == "1xy456789" - "hello ^-^".tr_s("e-", "a-a_").should == "hallo ^_^" - "hello ^-^".tr_s("---o", "_a").should == "hella ^_^" - end + it "accepts c1-c1 notation to denote range of one character" do + "hello".tr_s('e-e', 'x').should == "hxllo" + "123456789".tr_s("2-23","xy").should == "1xy456789" + "hello ^-^".tr_s("e-", "a-a_").should == "hallo ^_^" + "hello ^-^".tr_s("---o", "_a").should == "hella ^_^" end it "pads to_str with its last char if it is shorter than from_string" do diff --git a/spec/ruby/core/string/tr_spec.rb b/spec/ruby/core/string/tr_spec.rb index 75841a974fcc53..8478ccc9d2879c 100644 --- a/spec/ruby/core/string/tr_spec.rb +++ b/spec/ruby/core/string/tr_spec.rb @@ -17,13 +17,11 @@ "hello ^-^".tr("---", "_").should == "hello ^_^" end - ruby_bug "#19769", ""..."3.3" do - it "accepts c1-c1 notation to denote range of one character" do - "hello".tr('e-e', 'x').should == "hxllo" - "123456789".tr("2-23","xy").should == "1xy456789" - "hello ^-^".tr("e-", "a-a_").should == "hallo ^_^" - "hello ^-^".tr("---o", "_a").should == "hella ^_^" - end + it "accepts c1-c1 notation to denote range of one character" do + "hello".tr('e-e', 'x').should == "hxllo" + "123456789".tr("2-23","xy").should == "1xy456789" + "hello ^-^".tr("e-", "a-a_").should == "hallo ^_^" + "hello ^-^".tr("---o", "_a").should == "hella ^_^" end it "pads to_str with its last char if it is shorter than from_string" do diff --git a/spec/ruby/core/string/unpack/b_spec.rb b/spec/ruby/core/string/unpack/b_spec.rb index b088f901fc026c..70ea1cb6ad98e3 100644 --- a/spec/ruby/core/string/unpack/b_spec.rb +++ b/spec/ruby/core/string/unpack/b_spec.rb @@ -86,20 +86,10 @@ ].should be_computed_by(:unpack, "BBB") end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x80\x00".unpack("B\x00B").should == ["1", "0"] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x80\x00".unpack("B\x00B") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x80\x00".unpack("B\x00B") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -194,20 +184,10 @@ ].should be_computed_by(:unpack, "bbb") end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x01\x00".unpack("b\x00b").should == ["1", "0"] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x01\x00".unpack("b\x00b") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x00".unpack("b\x00b") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/c_spec.rb b/spec/ruby/core/string/unpack/c_spec.rb index 1e9548fb82411a..e42b027c7b8d6c 100644 --- a/spec/ruby/core/string/unpack/c_spec.rb +++ b/spec/ruby/core/string/unpack/c_spec.rb @@ -35,20 +35,10 @@ ].should be_computed_by(:unpack, unpack_format(3)) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "abc".unpack(unpack_format("\000", 2)).should == [97, 98] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "abc".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/h_spec.rb b/spec/ruby/core/string/unpack/h_spec.rb index 535836087d0f56..130b36401a7d05 100644 --- a/spec/ruby/core/string/unpack/h_spec.rb +++ b/spec/ruby/core/string/unpack/h_spec.rb @@ -56,20 +56,10 @@ ].should be_computed_by(:unpack, "HHH") end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x01\x10".unpack("H\x00H").should == ["0", "1"] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x01\x10".unpack("H\x00H") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x10".unpack("H\x00H") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -133,20 +123,10 @@ ].should be_computed_by(:unpack, "hhh") end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x01\x10".unpack("h\x00h").should == ["1", "0"] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x01\x10".unpack("h\x00h") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x10".unpack("h\x00h") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/basic.rb b/spec/ruby/core/string/unpack/shared/basic.rb index 734630bda0a620..132c4ef08acf21 100644 --- a/spec/ruby/core/string/unpack/shared/basic.rb +++ b/spec/ruby/core/string/unpack/shared/basic.rb @@ -9,20 +9,10 @@ "abc".unpack(d).should be_an_instance_of(Array) end - ruby_version_is ""..."3.3" do - it "warns about using an unknown directive" do - -> { "abcdefgh".unpack("a R" + unpack_format) }.should complain(/unknown unpack directive 'R' in 'a R#{unpack_format}'/) - -> { "abcdefgh".unpack("a 0" + unpack_format) }.should complain(/unknown unpack directive '0' in 'a 0#{unpack_format}'/) - -> { "abcdefgh".unpack("a :" + unpack_format) }.should complain(/unknown unpack directive ':' in 'a :#{unpack_format}'/) - end - end - - ruby_version_is "3.3" do - it "raises ArgumentError when a directive is unknown" do - -> { "abcdefgh".unpack("a K" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive 'K' in 'a K#{unpack_format}'") - -> { "abcdefgh".unpack("a 0" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive '0' in 'a 0#{unpack_format}'") - -> { "abcdefgh".unpack("a :" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive ':' in 'a :#{unpack_format}'") - end + it "raises ArgumentError when a directive is unknown" do + -> { "abcdefgh".unpack("a K" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive 'K' in 'a K#{unpack_format}'") + -> { "abcdefgh".unpack("a 0" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive '0' in 'a 0#{unpack_format}'") + -> { "abcdefgh".unpack("a :" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive ':' in 'a :#{unpack_format}'") end end diff --git a/spec/ruby/core/string/unpack/shared/float.rb b/spec/ruby/core/string/unpack/shared/float.rb index b31c2c8bdc406a..0133be2ecb498e 100644 --- a/spec/ruby/core/string/unpack/shared/float.rb +++ b/spec/ruby/core/string/unpack/shared/float.rb @@ -56,21 +56,10 @@ [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) - array.should == [2.9000000953674316, 1.399999976158142] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -135,21 +124,10 @@ [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) - array.should == [2.9000000953674316, 1.399999976158142] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -217,20 +195,10 @@ [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -297,20 +265,10 @@ [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/integer.rb b/spec/ruby/core/string/unpack/shared/integer.rb index d3934753ba3ef8..eb994562251732 100644 --- a/spec/ruby/core/string/unpack/shared/integer.rb +++ b/spec/ruby/core/string/unpack/shared/integer.rb @@ -32,20 +32,10 @@ ].should be_computed_by(:unpack, unpack_format(3)) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "abcd".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abcd".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -97,20 +87,10 @@ ].should be_computed_by(:unpack, unpack_format(3)) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "badc".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "badc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -163,20 +143,10 @@ ].should be_computed_by(:unpack, unpack_format(3)) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "abcdefgh".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abcdefgh".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -229,20 +199,10 @@ ].should be_computed_by(:unpack, unpack_format(3)) end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "dcbahgfe".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "dcbahgfe".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -291,21 +251,10 @@ "abc".unpack(unpack_format('*')).should == [] end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - array = "abcdefghabghefcd".unpack(unpack_format("\000", 2)) - array.should == [7523094288207667809, 7233738012216484449] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "badc".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "badc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do @@ -365,21 +314,10 @@ "abc".unpack(unpack_format('*')).should == [] end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) - array.should == [7523094288207667809, 7233738012216484449] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/unicode.rb b/spec/ruby/core/string/unpack/shared/unicode.rb index 9fe07f53aec1b3..b056aaed0be627 100644 --- a/spec/ruby/core/string/unpack/shared/unicode.rb +++ b/spec/ruby/core/string/unpack/shared/unicode.rb @@ -50,20 +50,10 @@ "\xc2\x80".unpack("UUUU").should == [0x80] end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x01\x02".unpack("U\x00U").should == [1, 2] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x01\x02".unpack("U\x00U") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x02".unpack("U\x00U") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/w_spec.rb b/spec/ruby/core/string/unpack/w_spec.rb index 7d3533ccae109e..d2ad657b09c8b8 100644 --- a/spec/ruby/core/string/unpack/w_spec.rb +++ b/spec/ruby/core/string/unpack/w_spec.rb @@ -15,20 +15,10 @@ ].should be_computed_by(:unpack, "w") end - ruby_version_is ""..."3.3" do - it "ignores NULL bytes between directives" do - suppress_warning do - "\x01\x02\x03".unpack("w\x00w").should == [1, 2] - end - end - end - - ruby_version_is "3.3" do - it "raise ArgumentError for NULL bytes between directives" do - -> { - "\x01\x02\x03".unpack("w\x00w") - }.should raise_error(ArgumentError, /unknown unpack directive/) - end + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x02\x03".unpack("w\x00w") + }.should raise_error(ArgumentError, /unknown unpack directive/) end it "ignores spaces between directives" do diff --git a/spec/ruby/core/struct/new_spec.rb b/spec/ruby/core/struct/new_spec.rb index 1d35de7b871230..741d6889af08a1 100644 --- a/spec/ruby/core/struct/new_spec.rb +++ b/spec/ruby/core/struct/new_spec.rb @@ -77,18 +77,10 @@ def obj.to_str() "Foo" end -> { Struct.new(:animal, { name: 'chris' }) }.should raise_error(TypeError) end - ruby_version_is ""..."3.3" do - it "raises ArgumentError if not provided any arguments" do - -> { Struct.new }.should raise_error(ArgumentError) - end - end - - ruby_version_is "3.3" do - it "works when not provided any arguments" do - c = Struct.new - c.should be_kind_of(Class) - c.superclass.should == Struct - end + it "works when not provided any arguments" do + c = Struct.new + c.should be_kind_of(Class) + c.superclass.should == Struct end it "raises ArgumentError when there is a duplicate member" do diff --git a/spec/ruby/core/symbol/inspect_spec.rb b/spec/ruby/core/symbol/inspect_spec.rb index df4566c48e6449..f2269996af0f92 100644 --- a/spec/ruby/core/symbol/inspect_spec.rb +++ b/spec/ruby/core/symbol/inspect_spec.rb @@ -109,4 +109,23 @@ input.inspect.should == expected end end + + it "quotes BINARY symbols" do + sym = "foo\xA4".b.to_sym + sym.inspect.should == ':"foo\xA4"' + end + + it "quotes symbols in non-ASCII-compatible encodings" do + Encoding.list.reject(&:ascii_compatible?).reject(&:dummy?).each do |encoding| + sym = "foo".encode(encoding).to_sym + sym.inspect.should == ':"foo"' + end + end + + it "quotes and escapes symbols in dummy encodings" do + Encoding.list.select(&:dummy?).each do |encoding| + sym = "abcd".dup.force_encoding(encoding).to_sym + sym.inspect.should == ':"\x61\x62\x63\x64"' + end + end end diff --git a/spec/ruby/core/thread/backtrace/location/fixtures/classes.rb b/spec/ruby/core/thread/backtrace/location/fixtures/classes.rb index e903c3e450fe97..103c36b3a0ab04 100644 --- a/spec/ruby/core/thread/backtrace/location/fixtures/classes.rb +++ b/spec/ruby/core/thread/backtrace/location/fixtures/classes.rb @@ -1,10 +1,26 @@ +# These are top-level def on purpose to test those cases + +def label_top_method = ThreadBacktraceLocationSpecs::LABEL.call + +def self.label_sdef_method_of_main = ThreadBacktraceLocationSpecs::LABEL.call + +class << self + def label_sclass_method_of_main = ThreadBacktraceLocationSpecs::LABEL.call +end + module ThreadBacktraceLocationSpecs MODULE_LOCATION = caller_locations(0) rescue nil + INSTANCE = Object.new.extend(self) + LABEL = -> { caller_locations(1, 1)[0].label } def self.locations caller_locations end + def instance_method_location + caller_locations(0) + end + def self.method_location caller_locations(0) end @@ -15,6 +31,12 @@ def self.block_location end end + def instance_block_location + 1.times do + return caller_locations(0) + end + end + def self.locations_inside_nested_blocks first_level_location = nil second_level_location = nil @@ -32,4 +54,86 @@ def self.locations_inside_nested_blocks [first_level_location, second_level_location, third_level_location] end + + def instance_locations_inside_nested_block + loc = nil + 1.times do + 1.times do + loc = caller_locations(0) + end + end + loc + end + + def original_method = LABEL.call + alias_method :aliased_method, :original_method + + module M + class C + def regular_instance_method = LABEL.call + + def self.sdef_class_method = LABEL.call + + class << self + def sclass_method = LABEL.call + + def block_in_sclass_method + -> { + -> { LABEL.call }.call + }.call + end + end + block_in_sclass_method + end + end + + class M::D + def scoped_method = LABEL.call + + def self.sdef_scoped_method = LABEL.call + + class << self + def sclass_scoped_method = LABEL.call + end + + module ::ThreadBacktraceLocationSpecs + def top = LABEL.call + end + + class ::ThreadBacktraceLocationSpecs::Nested + def top_nested = LABEL.call + + class C + def top_nested_c = LABEL.call + end + end + end + + SOME_OBJECT = Object.new + SOME_OBJECT.instance_exec do + def unknown_def_singleton_method = LABEL.call + + def self.unknown_sdef_singleton_method = LABEL.call + end + + M.module_eval do + def module_eval_method = LABEL.call + + def self.sdef_module_eval_method = LABEL.call + end + + def ThreadBacktraceLocationSpecs.string_class_method = LABEL.call + + module M + def ThreadBacktraceLocationSpecs.nested_class_method = LABEL.call + end + + module M + module_function def mod_function = LABEL.call + end + + expr = self + def expr.sdef_expression = LABEL.call + + def expr.block_in_sdef_expression = -> { LABEL.call }.call end diff --git a/spec/ruby/core/thread/backtrace/location/label_spec.rb b/spec/ruby/core/thread/backtrace/location/label_spec.rb index 85ddccc8e3f831..7d358b45ea8fe3 100644 --- a/spec/ruby/core/thread/backtrace/location/label_spec.rb +++ b/spec/ruby/core/thread/backtrace/location/label_spec.rb @@ -15,7 +15,7 @@ end it 'returns the module name for a module location' do - ThreadBacktraceLocationSpecs::MODULE_LOCATION[0].label.should include "ThreadBacktraceLocationSpecs" + ThreadBacktraceLocationSpecs::MODULE_LOCATION[0].label.should == "" end it 'includes the nesting level of a block as part of the location label' do @@ -34,4 +34,194 @@ main_label.should == "block in
\n" required_label.should == "block in \n" end + + it "return the same name as the caller for eval" do + this = caller_locations(0)[0].label + eval("caller_locations(0)[0]").label.should == this + + b = binding + b.eval("caller_locations(0)[0]").label.should == this + + b.local_variable_set(:binding_var1, 1) + b.eval("caller_locations(0)[0]").label.should == this + + b.local_variable_set(:binding_var2, 2) + b.eval("caller_locations(0)[0]").label.should == this + + b.local_variable_set(:binding_var2, 2) + eval("caller_locations(0)[0]", b).label.should == this + end + + ruby_version_is "3.4" do + describe "is Module#method for" do + it "a core method defined natively" do + BasicObject.instance_method(:instance_exec).should_not.source_location + loc = nil + loc = instance_exec { caller_locations(1, 1)[0] } + loc.label.should == "BasicObject#instance_exec" + end + + it "a core method defined in Ruby" do + Kernel.instance_method(:tap).should.source_location + loc = nil + tap { loc = caller_locations(1, 1)[0] } + loc.label.should == "Kernel#tap" + end + + it "an instance method defined in Ruby" do + ThreadBacktraceLocationSpecs::INSTANCE.instance_method_location[0].label.should == "ThreadBacktraceLocationSpecs#instance_method_location" + end + + it "a block in an instance method defined in Ruby" do + ThreadBacktraceLocationSpecs::INSTANCE.instance_block_location[0].label.should == "block in ThreadBacktraceLocationSpecs#instance_block_location" + end + + it "a nested block in an instance method defined in Ruby" do + ThreadBacktraceLocationSpecs::INSTANCE.instance_locations_inside_nested_block[0].label.should == "block (2 levels) in ThreadBacktraceLocationSpecs#instance_locations_inside_nested_block" + end + + it "a method defined via module_exec" do + ThreadBacktraceLocationSpecs.module_exec do + def in_module_exec + caller_locations(0) + end + end + ThreadBacktraceLocationSpecs::INSTANCE.in_module_exec[0].label.should == "ThreadBacktraceLocationSpecs#in_module_exec" + end + + it "a method defined via module_eval" do + ThreadBacktraceLocationSpecs.module_eval <<~RUBY + def in_module_eval + caller_locations(0) + end + RUBY + ThreadBacktraceLocationSpecs::INSTANCE.in_module_eval[0].label.should == "ThreadBacktraceLocationSpecs#in_module_eval" + end + end + + describe "is Module.method for" do + it "a singleton method defined in Ruby" do + ThreadBacktraceLocationSpecs.method_location[0].label.should == "ThreadBacktraceLocationSpecs.method_location" + end + + it "a block in a singleton method defined in Ruby" do + ThreadBacktraceLocationSpecs.block_location[0].label.should == "block in ThreadBacktraceLocationSpecs.block_location" + end + + it "a nested block in a singleton method defined in Ruby" do + ThreadBacktraceLocationSpecs.locations_inside_nested_blocks[2].label.should == "block (3 levels) in ThreadBacktraceLocationSpecs.locations_inside_nested_blocks" + end + + it "a singleton method defined via def Const.method" do + def ThreadBacktraceLocationSpecs.def_singleton + caller_locations(0) + end + ThreadBacktraceLocationSpecs.def_singleton[0].label.should == "ThreadBacktraceLocationSpecs.def_singleton" + end + end + + it "shows the original method name for an aliased method" do + ThreadBacktraceLocationSpecs::INSTANCE.aliased_method.should == "ThreadBacktraceLocationSpecs#original_method" + end + + # A wide variety of cases. + # These show interesting cases when trying to determine the name statically/at parse time + describe "is correct for" do + base = ThreadBacktraceLocationSpecs + + it "M::C#regular_instance_method" do + base::M::C.new.regular_instance_method.should == "#{base}::M::C#regular_instance_method" + end + + it "M::C.sdef_class_method" do + base::M::C.sdef_class_method.should == "#{base}::M::C.sdef_class_method" + end + + it "M::C.sclass_method" do + base::M::C.sclass_method.should == "#{base}::M::C.sclass_method" + end + + it "M::C.block_in_sclass_method" do + base::M::C.block_in_sclass_method.should == "block (2 levels) in #{base}::M::C.block_in_sclass_method" + end + + it "M::D#scoped_method" do + base::M::D.new.scoped_method.should == "#{base}::M::D#scoped_method" + end + + it "M::D.sdef_scoped_method" do + base::M::D.sdef_scoped_method.should == "#{base}::M::D.sdef_scoped_method" + end + + it "M::D.sclass_scoped_method" do + base::M::D.sclass_scoped_method.should == "#{base}::M::D.sclass_scoped_method" + end + + it "ThreadBacktraceLocationSpecs#top" do + ThreadBacktraceLocationSpecs::INSTANCE.top.should == "ThreadBacktraceLocationSpecs#top" + end + + it "ThreadBacktraceLocationSpecs::Nested#top_nested" do + ThreadBacktraceLocationSpecs::Nested.new.top_nested.should == "ThreadBacktraceLocationSpecs::Nested#top_nested" + end + + it "ThreadBacktraceLocationSpecs::Nested::C#top_nested_c" do + ThreadBacktraceLocationSpecs::Nested::C.new.top_nested_c.should == "ThreadBacktraceLocationSpecs::Nested::C#top_nested_c" + end + + it "Object#label_top_method" do + label_top_method.should == "Object#label_top_method" + end + + it "main.label_sdef_method_of_main" do + main = TOPLEVEL_BINDING.receiver + main.label_sdef_method_of_main.should == "label_sdef_method_of_main" + end + + it "main.label_sclass_method_of_main" do + main = TOPLEVEL_BINDING.receiver + main.label_sclass_method_of_main.should == "label_sclass_method_of_main" + end + + it "unknown_def_singleton_method" do + base::SOME_OBJECT.unknown_def_singleton_method.should == "unknown_def_singleton_method" + end + + it "unknown_sdef_singleton_method" do + base::SOME_OBJECT.unknown_sdef_singleton_method.should == "unknown_sdef_singleton_method" + end + + it "M#module_eval_method" do + Object.new.extend(base::M).module_eval_method.should == "#{base}::M#module_eval_method" + end + + it "M.sdef_module_eval_method" do + base::M.sdef_module_eval_method.should == "#{base}::M.sdef_module_eval_method" + end + + it "ThreadBacktraceLocationSpecs.string_class_method" do + ThreadBacktraceLocationSpecs.string_class_method.should == "ThreadBacktraceLocationSpecs.string_class_method" + end + + it "ThreadBacktraceLocationSpecs.nested_class_method" do + ThreadBacktraceLocationSpecs.nested_class_method.should == "ThreadBacktraceLocationSpecs.nested_class_method" + end + + it "M#mod_function" do + Object.new.extend(base::M).send(:mod_function).should == "#{base}::M#mod_function" + end + + it "M.mod_function" do + base::M.mod_function.should == "#{base}::M.mod_function" + end + + it "sdef_expression" do + base.sdef_expression.should == "#{base}.sdef_expression" + end + + it "block_in_sdef_expression" do + base.block_in_sdef_expression.should == "block in #{base}.block_in_sdef_expression" + end + end + end end diff --git a/spec/ruby/core/thread/native_thread_id_spec.rb b/spec/ruby/core/thread/native_thread_id_spec.rb index 374cc592797a20..65d1b5b318dbac 100644 --- a/spec/ruby/core/thread/native_thread_id_spec.rb +++ b/spec/ruby/core/thread/native_thread_id_spec.rb @@ -18,12 +18,8 @@ main_thread_id = Thread.current.native_thread_id t_thread_id = t.native_thread_id - if ruby_version_is "3.3" - # native_thread_id can be nil on a M:N scheduler - t_thread_id.should be_kind_of(Integer) if t_thread_id != nil - else - t_thread_id.should be_kind_of(Integer) - end + # native_thread_id can be nil on a M:N scheduler + t_thread_id.should be_kind_of(Integer) if t_thread_id != nil main_thread_id.should_not == t_thread_id diff --git a/spec/ruby/core/time/new_spec.rb b/spec/ruby/core/time/new_spec.rb index dc3ccbdc0052df..f3b5d0142044b4 100644 --- a/spec/ruby/core/time/new_spec.rb +++ b/spec/ruby/core/time/new_spec.rb @@ -554,20 +554,10 @@ def obj.to_int; 3; end Time.new("2020-12-25T00:56:17.123456789876 +09:00").subsec.should == 0.123456789 end - ruby_version_is ""..."3.3" do - it "raise TypeError is can't convert precision keyword argument into Integer" do - -> { - Time.new("2021-12-25 00:00:00.123456789876 +09:00", precision: "") - }.should raise_error(TypeError, "no implicit conversion from string") - end - end - - ruby_version_is "3.3" do - it "raise TypeError is can't convert precision keyword argument into Integer" do - -> { - Time.new("2021-12-25 00:00:00.123456789876 +09:00", precision: "") - }.should raise_error(TypeError, "no implicit conversion of String into Integer") - end + it "raise TypeError is can't convert precision keyword argument into Integer" do + -> { + Time.new("2021-12-25 00:00:00.123456789876 +09:00", precision: "") + }.should raise_error(TypeError, "no implicit conversion of String into Integer") end it "raises ArgumentError if part of time string is missing" do diff --git a/spec/ruby/core/tracepoint/path_spec.rb b/spec/ruby/core/tracepoint/path_spec.rb index dc2ca840b80ca5..aa6868ead2ffd8 100644 --- a/spec/ruby/core/tracepoint/path_spec.rb +++ b/spec/ruby/core/tracepoint/path_spec.rb @@ -13,29 +13,14 @@ path.should == "#{__FILE__}" end - ruby_version_is ""..."3.3" do - it 'equals (eval) inside an eval for :end event' do - path = nil - TracePoint.new(:end) { |tp| - next unless TracePointSpec.target_thread? - path = tp.path - }.enable do - eval("module TracePointSpec; end") - end - path.should == '(eval)' - end - end - - ruby_version_is "3.3" do - it 'equals "(eval at __FILE__:__LINE__)" inside an eval for :end event' do - path = nil - TracePoint.new(:end) { |tp| - next unless TracePointSpec.target_thread? - path = tp.path - }.enable do - eval("module TracePointSpec; end") - end - path.should == "(eval at #{__FILE__}:#{__LINE__ - 2})" + it 'equals "(eval at __FILE__:__LINE__)" inside an eval for :end event' do + path = nil + TracePoint.new(:end) { |tp| + next unless TracePointSpec.target_thread? + path = tp.path + }.enable do + eval("module TracePointSpec; end") end + path.should == "(eval at #{__FILE__}:#{__LINE__ - 2})" end end diff --git a/spec/ruby/core/tracepoint/raised_exception_spec.rb b/spec/ruby/core/tracepoint/raised_exception_spec.rb index 5ac85318404964..e74afa9abc96c1 100644 --- a/spec/ruby/core/tracepoint/raised_exception_spec.rb +++ b/spec/ruby/core/tracepoint/raised_exception_spec.rb @@ -18,21 +18,19 @@ end end - ruby_version_is "3.3" do - it 'returns value from exception rescued on the :rescue event' do - raised_exception, error_result = nil - trace = TracePoint.new(:rescue) { |tp| - next unless TracePointSpec.target_thread? - raised_exception = tp.raised_exception - } - trace.enable do - begin - raise StandardError - rescue => e - error_result = e - end - raised_exception.should equal(error_result) + it 'returns value from exception rescued on the :rescue event' do + raised_exception, error_result = nil + trace = TracePoint.new(:rescue) { |tp| + next unless TracePointSpec.target_thread? + raised_exception = tp.raised_exception + } + trace.enable do + begin + raise StandardError + rescue => e + error_result = e end + raised_exception.should equal(error_result) end end end diff --git a/spec/ruby/core/true/singleton_method_spec.rb b/spec/ruby/core/true/singleton_method_spec.rb index c06793850fa87a..575c504b728da3 100644 --- a/spec/ruby/core/true/singleton_method_spec.rb +++ b/spec/ruby/core/true/singleton_method_spec.rb @@ -1,15 +1,13 @@ require_relative '../../spec_helper' describe "TrueClass#singleton_method" do - ruby_version_is '3.3' do - it "raises regardless of whether TrueClass defines the method" do + it "raises regardless of whether TrueClass defines the method" do + -> { true.singleton_method(:foo) }.should raise_error(NameError) + begin + def (true).foo; end -> { true.singleton_method(:foo) }.should raise_error(NameError) - begin - def (true).foo; end - -> { true.singleton_method(:foo) }.should raise_error(NameError) - ensure - TrueClass.send(:remove_method, :foo) - end + ensure + TrueClass.send(:remove_method, :foo) end end end diff --git a/spec/ruby/core/unboundmethod/equal_value_spec.rb b/spec/ruby/core/unboundmethod/equal_value_spec.rb index b2d78c50afb359..c9f7ad45dacc82 100644 --- a/spec/ruby/core/unboundmethod/equal_value_spec.rb +++ b/spec/ruby/core/unboundmethod/equal_value_spec.rb @@ -110,9 +110,6 @@ class << self c.method(:n).should == Class.instance_method(:new).bind(c) end - # On CRuby < 3.2, the 2 specs below pass due to method/instance_method skipping zsuper methods. - # We are interested in the general pattern working, i.e. the combination of method/instance_method - # and #== exposes the wanted behavior. it "considers methods through visibility change equal" do c = Class.new do class << self diff --git a/spec/ruby/core/warning/element_reference_spec.rb b/spec/ruby/core/warning/element_reference_spec.rb index c0ed37ef139d05..6179c578646255 100644 --- a/spec/ruby/core/warning/element_reference_spec.rb +++ b/spec/ruby/core/warning/element_reference_spec.rb @@ -10,11 +10,9 @@ ruby_exe('p [Warning[:deprecated], Warning[:experimental]]', options: "-w").chomp.should == "[true, true]" end - ruby_version_is '3.3' do - it "returns default values for :performance category" do - ruby_exe('p Warning[:performance]').chomp.should == "false" - ruby_exe('p Warning[:performance]', options: "-w").chomp.should == "false" - end + it "returns default values for :performance category" do + ruby_exe('p Warning[:performance]').chomp.should == "false" + ruby_exe('p Warning[:performance]', options: "-w").chomp.should == "false" end it "raises for unknown category" do diff --git a/spec/ruby/core/warning/element_set_spec.rb b/spec/ruby/core/warning/element_set_spec.rb index d59a7d4c9e13c8..1dbc66ce26cae9 100644 --- a/spec/ruby/core/warning/element_set_spec.rb +++ b/spec/ruby/core/warning/element_set_spec.rb @@ -17,15 +17,13 @@ end end - ruby_version_is '3.3' do - it "enables or disables performance warnings" do - original = Warning[:performance] - begin - Warning[:performance] = !original - Warning[:performance].should == !original - ensure - Warning[:performance] = original - end + it "enables or disables performance warnings" do + original = Warning[:performance] + begin + Warning[:performance] = !original + Warning[:performance].should == !original + ensure + Warning[:performance] = original end end diff --git a/spec/ruby/language/assignments_spec.rb b/spec/ruby/language/assignments_spec.rb index c4adf73c1cbf67..58a244b7c27d87 100644 --- a/spec/ruby/language/assignments_spec.rb +++ b/spec/ruby/language/assignments_spec.rb @@ -219,15 +219,7 @@ def []=(*args, **kw) end end - ruby_version_is ""..."3.3" do - it "supports keyword arguments in index assignments" do - a = @klass.new - eval "a[1, 2, 3, b: 4] += 5" - a.x.should == [[1, 2, 3, {b: 4}, 105], {}] - end - end - - ruby_version_is "3.3"..."3.4" do + ruby_version_is ""..."3.4" do it "supports keyword arguments in index assignments" do a = @klass.new eval "a[1, 2, 3, b: 4] += 5" diff --git a/spec/ruby/language/block_spec.rb b/spec/ruby/language/block_spec.rb index cc003b8946270e..67aad76c57e922 100644 --- a/spec/ruby/language/block_spec.rb +++ b/spec/ruby/language/block_spec.rb @@ -192,6 +192,22 @@ def m(a) yield a end m(obj) { |a, b, c| [a, b, c] }.should == [1, 2, nil] end + it "calls #respond_to? on a BasicObject to check if object has method #to_ary" do + ScratchPad.record [] + obj = BasicObject.new + def obj.respond_to?(name, *) + ScratchPad << [:respond_to?, name] + name == :to_ary ? true : super + end + def obj.to_ary + ScratchPad << :to_ary + [1, 2] + end + + m(obj) { |a, b, c| [a, b, c] }.should == [1, 2, nil] + ScratchPad.recorded.should == [[:respond_to?, :to_ary], :to_ary] + end + it "receives the object if it does not respond to #respond_to?" do obj = BasicObject.new @@ -1041,8 +1057,8 @@ def all_kwrest(arg1, arg2, *rest, post1, post2, kw1: 1, kw2: 2, okw1:, okw2:, ** end end -describe "`it` calls without arguments in a block with no ordinary parameters" do - ruby_version_is "3.3"..."3.4" do +describe "`it` calls without arguments in a block" do + ruby_version_is ""..."3.4" do it "emits a deprecation warning" do -> { eval "proc { it }" @@ -1094,38 +1110,11 @@ def o.it end end end - - ruby_version_is "3.4" do - it "does not emit a deprecation warning" do - -> { - eval "proc { it }" - }.should_not complain - end - - it "acts as the first argument if no local variables exist" do - eval("proc { it * 2 }").call(5).should == 10 - end - - it "can be reassigned to act as a local variable" do - eval("proc { tmp = it; it = tmp * 2; it }").call(21).should == 42 - end - - it "can be used in nested calls" do - eval("proc { it.map { it * 2 } }").call([1, 2, 3]).should == [2, 4, 6] - end - - it "cannot be mixed with numbered parameters" do - -> { - eval "proc { it + _1 }" - }.should raise_error(SyntaxError, /numbered parameters are not allowed when 'it' is already used|'it' is already used in/) - - -> { - eval "proc { _1 + it }" - }.should raise_error(SyntaxError, /numbered parameter is already used in|'it' is not allowed when a numbered parameter is already used/) - end - end end +# Duplicates specs in language/it_parameter_spec.rb +# Need them here to run on Ruby versions prior 3.4 +# TODO: remove when the minimal supported Ruby version is 3.4 describe "if `it` is defined as a variable" do it "treats `it` as a captured variable if defined outside of a block" do it = 5 diff --git a/spec/ruby/language/delegation_spec.rb b/spec/ruby/language/delegation_spec.rb index c711a536c22d71..cd44956f5d1c65 100644 --- a/spec/ruby/language/delegation_spec.rb +++ b/spec/ruby/language/delegation_spec.rb @@ -37,6 +37,16 @@ def delegate(...) a.new.delegate(1, b: 2, &block).should == [[1], {b: 2}, block] end + it "delegates with additional arguments" do + a = Class.new(DelegationSpecs::Target) + a.class_eval(<<-RUBY) + def delegate(...) + target(:first, :second, ...) + end + RUBY + a.new.delegate(1, b: 2).should == [[:first, :second, 1], {b: 2}, nil] + end + it "parses as open endless Range when brackets are omitted" do a = Class.new(DelegationSpecs::Target) suppress_warning do @@ -99,13 +109,11 @@ def delegate(*) a.new.delegate(0, 1).should == [[0, 1], {}, nil] end - ruby_version_is "3.3" do - context "within a block that accepts anonymous rest within a method that accepts anonymous rest" do - it "does not allow delegating rest" do - -> { - eval "def m(*); proc { |*| n(*) } end" - }.should raise_error(SyntaxError, /anonymous rest parameter is also used within block/) - end + context "within a block that accepts anonymous rest within a method that accepts anonymous rest" do + it "does not allow delegating rest" do + -> { + eval "def m(*); proc { |*| n(*) } end" + }.should raise_error(SyntaxError, /anonymous rest parameter is also used within block/) end end end @@ -122,13 +130,11 @@ def delegate(**) a.new.delegate(a: 1) { |x| x }.should == [[], {a: 1}, nil] end - ruby_version_is "3.3" do - context "within a block that accepts anonymous kwargs within a method that accepts anonymous kwargs" do - it "does not allow delegating kwargs" do - -> { - eval "def m(**); proc { |**| n(**) } end" - }.should raise_error(SyntaxError, /anonymous keyword rest parameter is also used within block/) - end + context "within a block that accepts anonymous kwargs within a method that accepts anonymous kwargs" do + it "does not allow delegating kwargs" do + -> { + eval "def m(**); proc { |**| n(**) } end" + }.should raise_error(SyntaxError, /anonymous keyword rest parameter is also used within block/) end end end @@ -146,13 +152,11 @@ def delegate(&) a.new.delegate(&block).should == [[], {}, block] end - ruby_version_is "3.3" do - context "within a block that accepts anonymous block within a method that accepts anonymous block" do - it "does not allow delegating a block" do - -> { - eval "def m(&); proc { |&| n(&) } end" - }.should raise_error(SyntaxError, /anonymous block parameter is also used within block/) - end + context "within a block that accepts anonymous block within a method that accepts anonymous block" do + it "does not allow delegating a block" do + -> { + eval "def m(&); proc { |&| n(&) } end" + }.should raise_error(SyntaxError, /anonymous block parameter is also used within block/) end end end diff --git a/spec/ruby/language/file_spec.rb b/spec/ruby/language/file_spec.rb index 59563d9642e00e..36fd329bf6a7ca 100644 --- a/spec/ruby/language/file_spec.rb +++ b/spec/ruby/language/file_spec.rb @@ -7,16 +7,8 @@ -> { eval("__FILE__ = 1") }.should raise_error(SyntaxError) end - ruby_version_is ""..."3.3" do - it "equals (eval) inside an eval" do - eval("__FILE__").should == "(eval)" - end - end - - ruby_version_is "3.3" do - it "equals (eval at __FILE__:__LINE__) inside an eval" do - eval("__FILE__").should == "(eval at #{__FILE__}:#{__LINE__})" - end + it "equals (eval at __FILE__:__LINE__) inside an eval" do + eval("__FILE__").should == "(eval at #{__FILE__}:#{__LINE__})" end end diff --git a/spec/ruby/language/for_spec.rb b/spec/ruby/language/for_spec.rb index b8ddfe5f0ddfb0..7fc6751d070eb1 100644 --- a/spec/ruby/language/for_spec.rb +++ b/spec/ruby/language/for_spec.rb @@ -129,37 +129,34 @@ class OFor n.should == 3 end - # Segfault in MRI 3.3 and lower: https://bugs.ruby-lang.org/issues/20468 - ruby_bug "#20468", ""..."3.4" do - it "allows an attribute with safe navigation as an iterator name" do - class OFor - attr_accessor :target - end - - ofor = OFor.new - m = [1,2,3] - n = 0 - eval <<~RUBY - for ofor&.target in m - n += 1 - end - RUBY - ofor.target.should == 3 - n.should == 3 + it "allows an attribute with safe navigation as an iterator name" do + class OFor + attr_accessor :target end - it "allows an attribute with safe navigation on a nil base as an iterator name" do - ofor = nil - m = [1,2,3] - n = 0 - eval <<~RUBY - for ofor&.target in m - n += 1 - end - RUBY - ofor.should be_nil - n.should == 3 - end + ofor = OFor.new + m = [1,2,3] + n = 0 + eval <<~RUBY + for ofor&.target in m + n += 1 + end + RUBY + ofor.target.should == 3 + n.should == 3 + end + + it "allows an attribute with safe navigation on a nil base as an iterator name" do + ofor = nil + m = [1,2,3] + n = 0 + eval <<~RUBY + for ofor&.target in m + n += 1 + end + RUBY + ofor.should be_nil + n.should == 3 end it "allows an array index writer as an iterator name" do diff --git a/spec/ruby/language/hash_spec.rb b/spec/ruby/language/hash_spec.rb index 668716e2e325da..c7e1bf2d88bffd 100644 --- a/spec/ruby/language/hash_spec.rb +++ b/spec/ruby/language/hash_spec.rb @@ -167,6 +167,17 @@ def h.to_hash; {:b => 2, :c => 3}; end {**nil}.should == {} {a: 1, **nil}.should == {a: 1} end + + it "expands nil using ** into {} and provides a copy to the callable" do + ScratchPad.record [] + insert = -> key, **kw do + kw[key] = 1 + ScratchPad << kw + end + insert.call(:foo, **nil) + insert.call(:bar, **nil) + ScratchPad.recorded.should == [{ foo: 1 }, { bar: 1 }] + end end it "expands an '**{}' or '**obj' element with the last key/value pair taking precedence" do @@ -264,17 +275,15 @@ def m(**h) h.should == { one: 1, two: 2 } end - ruby_bug "#20012", ""..."3.3" do - it "makes a copy when calling a method taking a positional Hash" do - def m(h) - h.delete(:one); h - end - - h = { one: 1, two: 2 } - m(**h).should == { two: 2 } - m(**h).should_not.equal?(h) - h.should == { one: 1, two: 2 } + it "makes a copy when calling a method taking a positional Hash" do + def m(h) + h.delete(:one); h end + + h = { one: 1, two: 2 } + m(**h).should == { two: 2 } + m(**h).should_not.equal?(h) + h.should == { one: 1, two: 2 } end describe "hash with omitted value" do diff --git a/spec/ruby/language/it_parameter_spec.rb b/spec/ruby/language/it_parameter_spec.rb index 72023180d91d54..58ec3a6faf0f1a 100644 --- a/spec/ruby/language/it_parameter_spec.rb +++ b/spec/ruby/language/it_parameter_spec.rb @@ -1,6 +1,7 @@ require_relative '../spec_helper' ruby_version_is "3.4" do + eval <<-RUBY # use eval to avoid warnings on Ruby 3.3 describe "The `it` parameter" do it "provides it in a block" do -> { it }.call("a").should == "a" @@ -17,9 +18,28 @@ -> { it + -> { it * it }.call(2) }.call(3).should == 7 end + it "can be reassigned to act as a local variable" do + proc { tmp = it; it = tmp * 2; it }.call(21).should == 42 + end + it "is a regular local variable if there is already a 'it' local variable" do - it = 0 - proc { it }.call("a").should == 0 + it = 0 + proc { it }.call("a").should == 0 + end + + it "is a regular local variable if there is a method `it` defined" do + o = Object.new + def o.it + 21 + end + + o.instance_eval("proc { it * 2 }").call(1).should == 2 + end + + it "is not shadowed by an reassignment in a block" do + a = nil + proc { a = it; it = 42 }.call(0) + a.should == 0 # if `it` were shadowed its value would be nil end it "raises SyntaxError when block parameters are specified explicitly" do @@ -36,6 +56,16 @@ -> { eval("['a'].map { |x| it }") }.should raise_error(SyntaxError, /ordinary parameter is defined/) end + it "cannot be mixed with numbered parameters" do + -> { + eval("proc { it + _1 }") + }.should raise_error(SyntaxError, /numbered parameters are not allowed when 'it' is already used|'it' is already used in/) + + -> { + eval("proc { _1 + it }") + }.should raise_error(SyntaxError, /numbered parameter is already used in|'it' is not allowed when a numbered parameter is already used/) + end + it "affects block arity" do -> {}.arity.should == 0 -> { it }.arity.should == 1 @@ -62,5 +92,17 @@ def obj.foo; it; end -> { obj.foo("a") }.should raise_error(ArgumentError, /wrong number of arguments/) end + + context "given multiple arguments" do + it "provides it in a block and assigns the first argument for a block" do + proc { it }.call("a", "b").should == "a" + end + + it "raises ArgumentError for a proc" do + -> { -> { it }.call("a", "b") }.should raise_error(ArgumentError, "wrong number of arguments (given 2, expected 1)") + -> { lambda { it }.call("a", "b") }.should raise_error(ArgumentError, "wrong number of arguments (given 2, expected 1)") + end + end end + RUBY end diff --git a/spec/ruby/language/keyword_arguments_spec.rb b/spec/ruby/language/keyword_arguments_spec.rb index 4f6370d419e03b..c51c3bc656d4e9 100644 --- a/spec/ruby/language/keyword_arguments_spec.rb +++ b/spec/ruby/language/keyword_arguments_spec.rb @@ -87,16 +87,14 @@ def m(*a) end context "**" do - ruby_version_is "3.3" do - it "copies a non-empty Hash for a method taking (*args)" do - def m(*args) - args[0] - end - - h = {a: 1} - m(**h).should_not.equal?(h) - h.should == {a: 1} + it "copies a non-empty Hash for a method taking (*args)" do + def m(*args) + args[0] end + + h = {a: 1} + m(**h).should_not.equal?(h) + h.should == {a: 1} end it "copies the given Hash for a method taking (**kwargs)" do diff --git a/spec/ruby/language/method_spec.rb b/spec/ruby/language/method_spec.rb index 8f72bd45ed8cbe..8f9f094fd89a45 100644 --- a/spec/ruby/language/method_spec.rb +++ b/spec/ruby/language/method_spec.rb @@ -1234,10 +1234,8 @@ def n(value, &block) args.should == [true] end - ruby_version_is "3.3" do - it "supports multiple statements" do - eval("m (1; 2)").should == [2] - end + it "supports multiple statements" do + eval("m (1; 2)").should == [2] end end diff --git a/spec/ruby/library/English/English_spec.rb b/spec/ruby/library/English/English_spec.rb index 4d615d1e2506ef..166785f066640b 100644 --- a/spec/ruby/library/English/English_spec.rb +++ b/spec/ruby/library/English/English_spec.rb @@ -130,18 +130,6 @@ $LAST_MATCH_INFO.should == $~ end - ruby_version_is ""..."3.3" do - it "aliases $IGNORECASE to $=" do - $VERBOSE, verbose = nil, $VERBOSE - begin - $IGNORECASE.should_not be_nil - $IGNORECASE.should == $= - ensure - $VERBOSE = verbose - end - end - end - it "aliases $ARGV to $*" do $ARGV.should_not be_nil $ARGV.should == $* diff --git a/spec/ruby/library/bigdecimal/remainder_spec.rb b/spec/ruby/library/bigdecimal/remainder_spec.rb index 0eb06f7ef1d402..b31967e76bd53c 100644 --- a/spec/ruby/library/bigdecimal/remainder_spec.rb +++ b/spec/ruby/library/bigdecimal/remainder_spec.rb @@ -56,25 +56,6 @@ @nan.remainder(@infinity).should.nan? end - version_is BigDecimal::VERSION, ""..."3.1.4" do #ruby_version_is ""..."3.3" do - it "returns NaN if Infinity is involved" do - @infinity.remainder(@infinity).should.nan? - @infinity.remainder(@one).should.nan? - @infinity.remainder(@mixed).should.nan? - @infinity.remainder(@one_minus).should.nan? - @infinity.remainder(@frac_1).should.nan? - @one.remainder(@infinity).should.nan? - - @infinity_minus.remainder(@infinity_minus).should.nan? - @infinity_minus.remainder(@one).should.nan? - @one.remainder(@infinity_minus).should.nan? - @frac_2.remainder(@infinity_minus).should.nan? - - @infinity.remainder(@infinity_minus).should.nan? - @infinity_minus.remainder(@infinity).should.nan? - end - end - it "coerces arguments to BigDecimal if possible" do @three.remainder(2).should == @one end diff --git a/spec/ruby/library/bigdecimal/to_s_spec.rb b/spec/ruby/library/bigdecimal/to_s_spec.rb index ba9f960eb32450..025057b4d7e873 100644 --- a/spec/ruby/library/bigdecimal/to_s_spec.rb +++ b/spec/ruby/library/bigdecimal/to_s_spec.rb @@ -52,10 +52,8 @@ BigDecimal("1.2345").to_s('0F').should == "1.2345" end - version_is BigDecimal::VERSION, "3.1.5" do #ruby_version_is '3.3' do - it "inserts a space every n chars to integer part, if integer n is supplied" do - BigDecimal('1000010').to_s('5F').should == "10 00010.0" - end + it "inserts a space every n chars to integer part, if integer n is supplied" do + BigDecimal('1000010').to_s('5F').should == "10 00010.0" end it "can return a leading space for values > 0" do diff --git a/spec/ruby/library/random/formatter/alphanumeric_spec.rb b/spec/ruby/library/random/formatter/alphanumeric_spec.rb index 9bd325e1d0a6ab..ce45b96dc2b7a7 100644 --- a/spec/ruby/library/random/formatter/alphanumeric_spec.rb +++ b/spec/ruby/library/random/formatter/alphanumeric_spec.rb @@ -41,16 +41,14 @@ }.should raise_error(ArgumentError) end - ruby_version_is "3.3" do - it "accepts a 'chars' argument with the output alphabet" do - @object.alphanumeric(chars: ['a', 'b']).should =~ /\A[ab]+\z/ - end + it "accepts a 'chars' argument with the output alphabet" do + @object.alphanumeric(chars: ['a', 'b']).should =~ /\A[ab]+\z/ + end - it "converts the elements of chars using #to_s" do - to_s = mock("to_s") - to_s.should_receive(:to_s).and_return("[mock to_s]") - # Using 1 value in chars results in an infinite loop - @object.alphanumeric(1, chars: [to_s, to_s]).should == "[mock to_s]" - end + it "converts the elements of chars using #to_s" do + to_s = mock("to_s") + to_s.should_receive(:to_s).and_return("[mock to_s]") + # Using 1 value in chars results in an infinite loop + @object.alphanumeric(1, chars: [to_s, to_s]).should == "[mock to_s]" end end diff --git a/spec/ruby/library/ripper/lex_spec.rb b/spec/ruby/library/ripper/lex_spec.rb index 97cfb06904fad6..0255480579ee4f 100644 --- a/spec/ruby/library/ripper/lex_spec.rb +++ b/spec/ruby/library/ripper/lex_spec.rb @@ -10,14 +10,14 @@ [[1, 5], :on_lparen, "(", 'BEG|LABEL'], [[1, 6], :on_ident, "a", 'ARG'], [[1, 7], :on_rparen, ")", 'ENDFN'], - [[1, 8], :on_sp, " ", 'BEG'], + [[1, 8], :on_semicolon, ";", 'BEG'], [[1, 9], :on_kw, "nil", 'END'], [[1, 12], :on_sp, " ", 'END'], [[1, 13], :on_kw, "end", 'END'] ] - lexed = Ripper.lex("def m(a) nil end") + lexed = Ripper.lex("def m(a);nil end") lexed.map { |e| - e[0...-1] + [e[-1].to_s.split('|').map { |s| s.sub(/^EXPR_/, '') }.join('|')] + e[0...-1] + [e[-1].to_s] }.should == expected end end diff --git a/spec/ruby/library/socket/addrinfo/initialize_spec.rb b/spec/ruby/library/socket/addrinfo/initialize_spec.rb index 1f16531aaa4dea..c556bd758b925a 100644 --- a/spec/ruby/library/socket/addrinfo/initialize_spec.rb +++ b/spec/ruby/library/socket/addrinfo/initialize_spec.rb @@ -53,11 +53,11 @@ @addrinfo.ip_port.should == 25 end - it "returns the INET6 pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET6 end - it "returns the INET6 afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET6 end @@ -83,11 +83,11 @@ @addrinfo.ip_port.should == 25 end - it "returns the INET6 pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET6 end - it "returns the INET6 afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET6 end @@ -113,11 +113,11 @@ @addrinfo.ip_port.should == 25 end - it "returns the INET6 pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET6 end - it "returns the INET6 afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET6 end @@ -147,11 +147,11 @@ @addrinfo.ip_port.should == 46102 end - it "returns the INET pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET end - it "returns the INET afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET end @@ -217,11 +217,11 @@ @addrinfo.ip_port.should == 46102 end - it "returns the INET pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET end - it "returns the INET afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET end @@ -247,11 +247,11 @@ @addrinfo.ip_port.should == 46102 end - it "returns the INET pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET end - it "returns the INET afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET end @@ -311,11 +311,11 @@ @addrinfo.ip_port.should == 46102 end - it "returns the INET pfamily" do + it "returns the specified family" do @addrinfo.pfamily.should == Socket::PF_INET end - it "returns the INET afamily" do + it "returns the specified family" do @addrinfo.afamily.should == Socket::AF_INET end @@ -514,13 +514,13 @@ @sockaddr = Socket.sockaddr_in(80, '127.0.0.1') end - it 'returns an Addrinfo with :PF_INET family' do + it 'returns an Addrinfo with the specified family' do addr = Addrinfo.new(@sockaddr, :PF_INET) addr.pfamily.should == Socket::PF_INET end - it 'returns an Addrinfo with :INET family' do + it 'returns an Addrinfo with the specified family' do addr = Addrinfo.new(@sockaddr, :INET) addr.pfamily.should == Socket::PF_INET @@ -544,13 +544,13 @@ @sockaddr = Socket.sockaddr_in(80, '127.0.0.1') end - it 'returns an Addrinfo with "PF_INET" family' do + it 'returns an Addrinfo with the specified family' do addr = Addrinfo.new(@sockaddr, 'PF_INET') addr.pfamily.should == Socket::PF_INET end - it 'returns an Addrinfo with "INET" family' do + it 'returns an Addrinfo with the specified family' do addr = Addrinfo.new(@sockaddr, 'INET') addr.pfamily.should == Socket::PF_INET diff --git a/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb b/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb index f2a6682f12b8ea..f2383513f286b2 100644 --- a/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb +++ b/spec/ruby/library/socket/basicsocket/recv_nonblock_spec.rb @@ -112,60 +112,30 @@ @server.close unless @server.closed? end - ruby_version_is ""..."3.3" do - it "returns an empty String on a closed stream socket" do - ready = false - - t = Thread.new do - client = @server.accept - - Thread.pass while !ready - begin - client.recv_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + it "returns nil on a closed stream socket" do + ready = false - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - ready = true + t = Thread.new do + client = @server.accept - t.value.should == "" - end - end - - ruby_version_is "3.3" do - it "returns nil on a closed stream socket" do - ready = false - - t = Thread.new do - client = @server.accept - - Thread.pass while !ready - begin - client.recv_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client + Thread.pass while !ready + begin + client.recv_nonblock(10) + rescue IO::EAGAINWaitReadable + retry end + ensure + client.close if client + end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - ready = true + socket = TCPSocket.new('127.0.0.1', @port) + socket.close + ready = true - t.value.should be_nil - end + t.value.should be_nil end end end diff --git a/spec/ruby/library/socket/basicsocket/recv_spec.rb b/spec/ruby/library/socket/basicsocket/recv_spec.rb index a51920f52a092a..7581f1bc1533fa 100644 --- a/spec/ruby/library/socket/basicsocket/recv_spec.rb +++ b/spec/ruby/library/socket/basicsocket/recv_spec.rb @@ -184,42 +184,21 @@ @server.close unless @server.closed? end - ruby_version_is ""..."3.3" do - it "returns an empty String on a closed stream socket" do - t = Thread.new do - client = @server.accept - client.recv(10) - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - - t.value.should == "" + it "returns nil on a closed stream socket" do + t = Thread.new do + client = @server.accept + client.recv(10) + ensure + client.close if client end - end - - ruby_version_is "3.3" do - it "returns nil on a closed stream socket" do - t = Thread.new do - client = @server.accept - client.recv(10) - ensure - client.close if client - end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - socket = TCPSocket.new('127.0.0.1', @port) - socket.close + socket = TCPSocket.new('127.0.0.1', @port) + socket.close - t.value.should be_nil - end + t.value.should be_nil end end diff --git a/spec/ruby/library/socket/basicsocket/recvmsg_nonblock_spec.rb b/spec/ruby/library/socket/basicsocket/recvmsg_nonblock_spec.rb index b5fdd7c93bee8d..d1cde4411bd8bc 100644 --- a/spec/ruby/library/socket/basicsocket/recvmsg_nonblock_spec.rb +++ b/spec/ruby/library/socket/basicsocket/recvmsg_nonblock_spec.rb @@ -235,64 +235,31 @@ @server.close unless @server.closed? end - ruby_version_is ""..."3.3" do - platform_is_not :windows do # #recvmsg_nonblock() raises 'Errno::EINVAL: Invalid argument - recvmsg(2)' - it "returns an empty String as received data on a closed stream socket" do - ready = false + platform_is_not :windows do + it "returns nil on a closed stream socket" do + ready = false - t = Thread.new do - client = @server.accept + t = Thread.new do + client = @server.accept - Thread.pass while !ready - begin - client.recvmsg_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client + Thread.pass while !ready + begin + client.recvmsg_nonblock(10) + rescue IO::EAGAINWaitReadable + retry end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - ready = true - - t.value.should.is_a? Array - t.value[0].should == "" + ensure + client.close if client end - end - end - ruby_version_is "3.3" do - platform_is_not :windows do - it "returns nil on a closed stream socket" do - ready = false + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - t = Thread.new do - client = @server.accept + socket = TCPSocket.new('127.0.0.1', @port) + socket.close + ready = true - Thread.pass while !ready - begin - client.recvmsg_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - ready = true - - t.value.should be_nil - end + t.value.should be_nil end end end diff --git a/spec/ruby/library/socket/basicsocket/recvmsg_spec.rb b/spec/ruby/library/socket/basicsocket/recvmsg_spec.rb index 04ba1d74c768c1..cfa0f4c61d476f 100644 --- a/spec/ruby/library/socket/basicsocket/recvmsg_spec.rb +++ b/spec/ruby/library/socket/basicsocket/recvmsg_spec.rb @@ -208,46 +208,22 @@ @server.close unless @server.closed? end - ruby_version_is ""..."3.3" do - platform_is_not :windows do - it "returns an empty String as received data on a closed stream socket" do - t = Thread.new do - client = @server.accept - client.recvmsg(10) - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - socket = TCPSocket.new('127.0.0.1', @port) - socket.close - - t.value.should.is_a? Array - t.value[0].should == "" + platform_is_not :windows do + it "returns nil on a closed stream socket" do + t = Thread.new do + client = @server.accept + client.recvmsg(10) + ensure + client.close if client end - end - end - - ruby_version_is "3.3" do - platform_is_not :windows do - it "returns nil on a closed stream socket" do - t = Thread.new do - client = @server.accept - client.recvmsg(10) - ensure - client.close if client - end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - socket = TCPSocket.new('127.0.0.1', @port) - socket.close + socket = TCPSocket.new('127.0.0.1', @port) + socket.close - t.value.should be_nil - end + t.value.should be_nil end end end diff --git a/spec/ruby/library/socket/ipsocket/recvfrom_spec.rb b/spec/ruby/library/socket/ipsocket/recvfrom_spec.rb index b58903df237b9d..5e6a145c9bdaeb 100644 --- a/spec/ruby/library/socket/ipsocket/recvfrom_spec.rb +++ b/spec/ruby/library/socket/ipsocket/recvfrom_spec.rb @@ -83,43 +83,21 @@ @client.close unless @client.closed? end - ruby_version_is ""..."3.3" do - it "returns an empty String as received data on a closed stream socket" do - t = Thread.new do - client = @server.accept - message = client.recvfrom(10) - message - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - @client.close - - t.value.should.is_a? Array - t.value[0].should == "" + it "returns nil on a closed stream socket" do + t = Thread.new do + client = @server.accept + message = client.recvfrom(10) + message + ensure + client.close if client end - end - - ruby_version_is "3.3" do - it "returns nil on a closed stream socket" do - t = Thread.new do - client = @server.accept - message = client.recvfrom(10) - message - ensure - client.close if client - end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - @client.close + @client.close - t.value.should be_nil - end + t.value.should be_nil end end diff --git a/spec/ruby/library/socket/socket/getaddrinfo_spec.rb b/spec/ruby/library/socket/socket/getaddrinfo_spec.rb index 6576af52eeadc7..17ffeaccaf498b 100644 --- a/spec/ruby/library/socket/socket/getaddrinfo_spec.rb +++ b/spec/ruby/library/socket/socket/getaddrinfo_spec.rb @@ -107,22 +107,12 @@ res.each { |a| expected.should include(a) } end - ruby_version_is ""..."3.3" do - it "raises SocketError when fails to resolve address" do - -> { - Socket.getaddrinfo("www.kame.net", 80, "AF_UNIX") - }.should raise_error(SocketError) - end - end - - ruby_version_is "3.3" do - it "raises ResolutionError when fails to resolve address" do - -> { - Socket.getaddrinfo("www.kame.net", 80, "AF_UNIX") - }.should raise_error(Socket::ResolutionError) { |e| - [Socket::EAI_FAMILY, Socket::EAI_FAIL].should.include?(e.error_code) - } - end + it "raises ResolutionError when fails to resolve address" do + -> { + Socket.getaddrinfo("www.kame.net", 80, "AF_UNIX") + }.should raise_error(Socket::ResolutionError) { |e| + [Socket::EAI_FAMILY, Socket::EAI_FAIL].should.include?(e.error_code) + } end end end diff --git a/spec/ruby/library/socket/socket/getnameinfo_spec.rb b/spec/ruby/library/socket/socket/getnameinfo_spec.rb index af4a10c9c2baa5..48cc94bcd182ab 100644 --- a/spec/ruby/library/socket/socket/getnameinfo_spec.rb +++ b/spec/ruby/library/socket/socket/getnameinfo_spec.rb @@ -61,22 +61,12 @@ def should_be_valid_dns_name(name) name_info[1].should == 'discard' end - ruby_version_is ""..."3.3" do - it "raises SocketError when fails to resolve address" do - -> { - Socket.getnameinfo(["AF_UNIX", 80, "0.0.0.0"]) - }.should raise_error(SocketError) - end - end - - ruby_version_is "3.3" do - it "raises ResolutionError when fails to resolve address" do - -> { - Socket.getnameinfo(["AF_UNIX", 80, "0.0.0.0"]) - }.should raise_error(Socket::ResolutionError) { |e| - [Socket::EAI_FAMILY, Socket::EAI_FAIL].should.include?(e.error_code) - } - end + it "raises ResolutionError when fails to resolve address" do + -> { + Socket.getnameinfo(["AF_UNIX", 80, "0.0.0.0"]) + }.should raise_error(Socket::ResolutionError) { |e| + [Socket::EAI_FAMILY, Socket::EAI_FAIL].should.include?(e.error_code) + } end end diff --git a/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb b/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb index 01b42bcc52b4fa..38a9f5ff5bc3fe 100644 --- a/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb +++ b/spec/ruby/library/socket/socket/recvfrom_nonblock_spec.rb @@ -158,61 +158,30 @@ @client.close unless @client.closed? end - ruby_version_is ""..."3.3" do - it "returns an empty String as received data on a closed stream socket" do - ready = false - - t = Thread.new do - client, _ = @server.accept - - Thread.pass while !ready - begin - client.recvfrom_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + it "returns nil on a closed stream socket" do + ready = false - @client.connect(@server_addr) - @client.close - ready = true - - t.value.should.is_a? Array - t.value[0].should == "" - end - end + t = Thread.new do + client, _ = @server.accept - ruby_version_is "3.3" do - it "returns nil on a closed stream socket" do - ready = false - - t = Thread.new do - client, _ = @server.accept - - Thread.pass while !ready - begin - client.recvfrom_nonblock(10) - rescue IO::EAGAINWaitReadable - retry - end - ensure - client.close if client + Thread.pass while !ready + begin + client.recvfrom_nonblock(10) + rescue IO::EAGAINWaitReadable + retry end + ensure + client.close if client + end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - @client.connect(@server_addr) - @client.close - ready = true + @client.connect(@server_addr) + @client.close + ready = true - t.value.should be_nil - end + t.value.should be_nil end end end diff --git a/spec/ruby/library/socket/socket/recvfrom_spec.rb b/spec/ruby/library/socket/socket/recvfrom_spec.rb index 6ba39ffcaf534c..cbbc162f6b0d28 100644 --- a/spec/ruby/library/socket/socket/recvfrom_spec.rb +++ b/spec/ruby/library/socket/socket/recvfrom_spec.rb @@ -111,43 +111,21 @@ @client.close unless @client.closed? end - ruby_version_is ""..."3.3" do - it "returns an empty String as received data on a closed stream socket" do - t = Thread.new do - client, _ = @server.accept - client.recvfrom(10) - ensure - client.close if client - end - - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil - - @client.connect(@server_addr) - @client.close - - t.value.should.is_a? Array - t.value[0].should == "" + it "returns nil on a closed stream socket" do + t = Thread.new do + client, _ = @server.accept + client.recvfrom(10) + ensure + client.close if client end - end - - ruby_version_is "3.3" do - it "returns nil on a closed stream socket" do - t = Thread.new do - client, _ = @server.accept - client.recvfrom(10) - ensure - client.close if client - end - Thread.pass while t.status and t.status != "sleep" - t.status.should_not be_nil + Thread.pass while t.status and t.status != "sleep" + t.status.should_not be_nil - @client.connect(@server_addr) - @client.close + @client.connect(@server_addr) + @client.close - t.value.should be_nil - end + t.value.should be_nil end end diff --git a/spec/ruby/library/stringscanner/named_captures_spec.rb b/spec/ruby/library/stringscanner/named_captures_spec.rb index a68d66c216a82e..927784a6c4a8a9 100644 --- a/spec/ruby/library/stringscanner/named_captures_spec.rb +++ b/spec/ruby/library/stringscanner/named_captures_spec.rb @@ -16,11 +16,9 @@ @s.named_captures.should == {} end - # https://github.com/ruby/strscan/issues/132 - ruby_bug "", ""..."3.3" do # fixed in strscan v3.0.7 - it "returns {} if there is no any matching done" do - @s.named_captures.should == {} - end + # https://github.com/ruby/strscan/issues/132 fixed in strscan v3.0.7 + it "returns {} if there is no any matching done" do + @s.named_captures.should == {} end it "returns nil for an optional named capturing group if it doesn't match" do diff --git a/spec/ruby/optional/capi/encoding_spec.rb b/spec/ruby/optional/capi/encoding_spec.rb index c14983c7ead703..734b5f125381db 100644 --- a/spec/ruby/optional/capi/encoding_spec.rb +++ b/spec/ruby/optional/capi/encoding_spec.rb @@ -745,4 +745,34 @@ ruby_exe(code, args: "2>&1", exit_status: 1).should.include?('too many encoding (> 256) (EncodingError)') end end + + describe "ONIGENC_IS_UNICODE" do + it "is true only for select UTF-related encodings" do + unicode = [ + Encoding::UTF_8, + Encoding::UTF8_DOCOMO, + Encoding::UTF8_KDDI, + Encoding::UTF8_MAC, + Encoding::UTF8_SOFTBANK, + Encoding::CESU_8, + Encoding::UTF_16LE, + Encoding::UTF_16BE, + Encoding::UTF_32LE, + Encoding::UTF_32BE + ] + unicode.each do |enc| + @s.should.ONIGENC_IS_UNICODE(enc) + end + + (Encoding.list - unicode).each { |enc| + @s.should_not.ONIGENC_IS_UNICODE(enc) + } + end + + # Redundant with the above but more explicit + it "is false for the dummy UTF-16 and UTF-32 encodings" do + @s.should_not.ONIGENC_IS_UNICODE(Encoding::UTF_16) + @s.should_not.ONIGENC_IS_UNICODE(Encoding::UTF_32) + end + end end diff --git a/spec/ruby/optional/capi/ext/encoding_spec.c b/spec/ruby/optional/capi/ext/encoding_spec.c index aa8662cfbd6426..98d4e2e3b772c8 100644 --- a/spec/ruby/optional/capi/ext/encoding_spec.c +++ b/spec/ruby/optional/capi/ext/encoding_spec.c @@ -324,6 +324,10 @@ static VALUE encoding_spec_rb_define_dummy_encoding(VALUE self, VALUE name) { return INT2NUM(rb_define_dummy_encoding(RSTRING_PTR(name))); } +static VALUE encoding_spec_ONIGENC_IS_UNICODE(VALUE self, VALUE encoding) { + return ONIGENC_IS_UNICODE(rb_to_encoding(encoding)) ? Qtrue : Qfalse; +} + void Init_encoding_spec(void) { VALUE cls; native_rb_encoding_pointer = (rb_encoding**) malloc(sizeof(rb_encoding*)); @@ -384,6 +388,7 @@ void Init_encoding_spec(void) { rb_define_method(cls, "ONIGENC_MBC_CASE_FOLD", encoding_spec_ONIGENC_MBC_CASE_FOLD, 1); rb_define_method(cls, "rb_enc_left_char_head", encoding_spec_rb_enc_left_char_head, 2); rb_define_method(cls, "rb_define_dummy_encoding", encoding_spec_rb_define_dummy_encoding, 1); + rb_define_method(cls, "ONIGENC_IS_UNICODE", encoding_spec_ONIGENC_IS_UNICODE, 1); } #ifdef __cplusplus diff --git a/spec/ruby/optional/capi/ext/kernel_spec.c b/spec/ruby/optional/capi/ext/kernel_spec.c index a8fed21b5900b6..eee324052d0936 100644 --- a/spec/ruby/optional/capi/ext/kernel_spec.c +++ b/spec/ruby/optional/capi/ext/kernel_spec.c @@ -1,4 +1,5 @@ #include "ruby.h" +#include "ruby/vm.h" #include "rubyspec.h" #include @@ -337,6 +338,15 @@ static VALUE kernel_spec_rb_set_end_proc(VALUE self, VALUE io) { return Qnil; } +static void at_exit_hook(ruby_vm_t *vm) { + puts("ruby_vm_at_exit hook ran"); +} + +static VALUE kernel_spec_ruby_vm_at_exit(VALUE self) { + ruby_vm_at_exit(at_exit_hook); + return self; +} + static VALUE kernel_spec_rb_f_sprintf(VALUE self, VALUE ary) { return rb_f_sprintf((int)RARRAY_LEN(ary), RARRAY_PTR(ary)); } @@ -434,6 +444,7 @@ void Init_kernel_spec(void) { rb_define_method(cls, "rb_yield_splat", kernel_spec_rb_yield_splat, 1); rb_define_method(cls, "rb_exec_recursive", kernel_spec_rb_exec_recursive, 1); rb_define_method(cls, "rb_set_end_proc", kernel_spec_rb_set_end_proc, 1); + rb_define_method(cls, "ruby_vm_at_exit", kernel_spec_ruby_vm_at_exit, 0); rb_define_method(cls, "rb_f_sprintf", kernel_spec_rb_f_sprintf, 1); rb_define_method(cls, "rb_str_format", kernel_spec_rb_str_format, 3); rb_define_method(cls, "rb_make_backtrace", kernel_spec_rb_make_backtrace, 0); diff --git a/spec/ruby/optional/capi/ext/string_spec.c b/spec/ruby/optional/capi/ext/string_spec.c index 094013e049cbf6..74aa9e56e816fe 100644 --- a/spec/ruby/optional/capi/ext/string_spec.c +++ b/spec/ruby/optional/capi/ext/string_spec.c @@ -581,6 +581,14 @@ static VALUE string_spec_rb_str_to_interned_str(VALUE self, VALUE str) { return rb_str_to_interned_str(str); } +static VALUE string_spec_rb_interned_str(VALUE self, VALUE str, VALUE len) { + return rb_interned_str(RSTRING_PTR(str), FIX2LONG(len)); +} + +static VALUE string_spec_rb_interned_str_cstr(VALUE self, VALUE str) { + return rb_interned_str_cstr(RSTRING_PTR(str)); +} + void Init_string_spec(void) { VALUE cls = rb_define_class("CApiStringSpecs", rb_cObject); rb_define_method(cls, "rb_cstr2inum", string_spec_rb_cstr2inum, 2); @@ -681,6 +689,8 @@ void Init_string_spec(void) { rb_define_method(cls, "rb_enc_interned_str_cstr", string_spec_rb_enc_interned_str_cstr, 2); rb_define_method(cls, "rb_enc_interned_str", string_spec_rb_enc_interned_str, 3); rb_define_method(cls, "rb_str_to_interned_str", string_spec_rb_str_to_interned_str, 1); + rb_define_method(cls, "rb_interned_str", string_spec_rb_interned_str, 2); + rb_define_method(cls, "rb_interned_str_cstr", string_spec_rb_interned_str_cstr, 1); } #ifdef __cplusplus diff --git a/spec/ruby/optional/capi/io_spec.rb b/spec/ruby/optional/capi/io_spec.rb index ab7a7fc8f6f661..dc4ac3e3744ce8 100644 --- a/spec/ruby/optional/capi/io_spec.rb +++ b/spec/ruby/optional/capi/io_spec.rb @@ -494,166 +494,164 @@ end end - ruby_version_is "3.3" do - describe "rb_io_mode" do - it "returns the mode" do - (@o.rb_io_mode(@r_io) & 0b11).should == 0b01 - (@o.rb_io_mode(@w_io) & 0b11).should == 0b10 - (@o.rb_io_mode(@rw_io) & 0b11).should == 0b11 - end + describe "rb_io_mode" do + it "returns the mode" do + (@o.rb_io_mode(@r_io) & 0b11).should == 0b01 + (@o.rb_io_mode(@w_io) & 0b11).should == 0b10 + (@o.rb_io_mode(@rw_io) & 0b11).should == 0b11 end + end - describe "rb_io_path" do - it "returns the IO#path" do - @o.rb_io_path(@r_io).should == @r_io.path - @o.rb_io_path(@rw_io).should == @rw_io.path - @o.rb_io_path(@rw_io).should == @name - end + describe "rb_io_path" do + it "returns the IO#path" do + @o.rb_io_path(@r_io).should == @r_io.path + @o.rb_io_path(@rw_io).should == @rw_io.path + @o.rb_io_path(@rw_io).should == @name end + end - describe "rb_io_closed_p" do - it "returns false when io is not closed" do - @o.rb_io_closed_p(@r_io).should == false - @r_io.closed?.should == false - end + describe "rb_io_closed_p" do + it "returns false when io is not closed" do + @o.rb_io_closed_p(@r_io).should == false + @r_io.closed?.should == false + end - it "returns true when io is closed" do - @r_io.close + it "returns true when io is closed" do + @r_io.close - @o.rb_io_closed_p(@r_io).should == true - @r_io.closed?.should == true - end + @o.rb_io_closed_p(@r_io).should == true + @r_io.closed?.should == true end + end - quarantine! do # "Errno::EBADF: Bad file descriptor" at closing @r_io, @rw_io etc in the after :each hook - describe "rb_io_open_descriptor" do - it "creates a new IO instance" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.should.is_a?(IO) - end - - it "return an instance of the specified class" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.class.should == File + quarantine! do # "Errno::EBADF: Bad file descriptor" at closing @r_io, @rw_io etc in the after :each hook + describe "rb_io_open_descriptor" do + it "creates a new IO instance" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.should.is_a?(IO) + end - io = @o.rb_io_open_descriptor(IO, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.class.should == IO - end + it "return an instance of the specified class" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.class.should == File - it "sets the specified file descriptor" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.fileno.should == @r_io.fileno - end + io = @o.rb_io_open_descriptor(IO, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.class.should == IO + end - it "sets the specified path" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.path.should == "a.txt" - end + it "sets the specified file descriptor" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.fileno.should == @r_io.fileno + end - it "sets the specified mode" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, CApiIOSpecs::FMODE_BINMODE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.should.binmode? + it "sets the specified path" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.path.should == "a.txt" + end - io = @o.rb_io_open_descriptor(File, @r_io.fileno, CApiIOSpecs::FMODE_TEXTMODE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.should_not.binmode? - end + it "sets the specified mode" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, CApiIOSpecs::FMODE_BINMODE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.should.binmode? - it "sets the specified timeout" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.timeout.should == 60 - end + io = @o.rb_io_open_descriptor(File, @r_io.fileno, CApiIOSpecs::FMODE_TEXTMODE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.should_not.binmode? + end - it "sets the specified internal encoding" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.internal_encoding.should == Encoding::US_ASCII - end + it "sets the specified timeout" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.timeout.should == 60 + end - it "sets the specified external encoding" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.external_encoding.should == Encoding::UTF_8 - end + it "sets the specified internal encoding" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.internal_encoding.should == Encoding::US_ASCII + end - it "does not apply the specified encoding flags" do - name = tmp("rb_io_open_descriptor_specs") - File.write(name, "123\r\n456\n89") - file = File.open(name, "r") + it "sets the specified external encoding" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.external_encoding.should == Encoding::UTF_8 + end - io = @o.rb_io_open_descriptor(File, file.fileno, CApiIOSpecs::FMODE_READABLE, "a.txt", 60, "US-ASCII", "UTF-8", CApiIOSpecs::ECONV_UNIVERSAL_NEWLINE_DECORATOR, {}) - io.read_nonblock(20).should == "123\r\n456\n89" - ensure - file.close - rm_r name - end + it "does not apply the specified encoding flags" do + name = tmp("rb_io_open_descriptor_specs") + File.write(name, "123\r\n456\n89") + file = File.open(name, "r") - it "ignores the IO open options" do - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {external_encoding: "windows-1251"}) - io.external_encoding.should == Encoding::UTF_8 + io = @o.rb_io_open_descriptor(File, file.fileno, CApiIOSpecs::FMODE_READABLE, "a.txt", 60, "US-ASCII", "UTF-8", CApiIOSpecs::ECONV_UNIVERSAL_NEWLINE_DECORATOR, {}) + io.read_nonblock(20).should == "123\r\n456\n89" + ensure + file.close + rm_r name + end - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {internal_encoding: "windows-1251"}) - io.internal_encoding.should == Encoding::US_ASCII + it "ignores the IO open options" do + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {external_encoding: "windows-1251"}) + io.external_encoding.should == Encoding::UTF_8 - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {encoding: "windows-1251:binary"}) - io.external_encoding.should == Encoding::UTF_8 - io.internal_encoding.should == Encoding::US_ASCII + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {internal_encoding: "windows-1251"}) + io.internal_encoding.should == Encoding::US_ASCII - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {textmode: false}) - io.should_not.binmode? + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {encoding: "windows-1251:binary"}) + io.external_encoding.should == Encoding::UTF_8 + io.internal_encoding.should == Encoding::US_ASCII - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {binmode: true}) - io.should_not.binmode? + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {textmode: false}) + io.should_not.binmode? - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {autoclose: false}) - io.should.autoclose? + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {binmode: true}) + io.should_not.binmode? - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {path: "a.txt"}) - io.path.should == "a.txt" - end + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {autoclose: false}) + io.should.autoclose? - it "ignores the IO encoding options" do - io = @o.rb_io_open_descriptor(File, @w_io.fileno, CApiIOSpecs::FMODE_WRITABLE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {crlf_newline: true}) + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, "a.txt", 60, "US-ASCII", "UTF-8", 0, {path: "a.txt"}) + io.path.should == "a.txt" + end - io.write("123\r\n456\n89") - io.flush + it "ignores the IO encoding options" do + io = @o.rb_io_open_descriptor(File, @w_io.fileno, CApiIOSpecs::FMODE_WRITABLE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {crlf_newline: true}) - @r_io.read_nonblock(20).should == "123\r\n456\n89" - end + io.write("123\r\n456\n89") + io.flush - it "allows wrong mode" do - io = @o.rb_io_open_descriptor(File, @w_io.fileno, CApiIOSpecs::FMODE_READABLE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) - io.should.is_a?(File) + @r_io.read_nonblock(20).should == "123\r\n456\n89" + end - platform_is_not :windows do - -> { io.read_nonblock(1) }.should raise_error(Errno::EBADF) - end + it "allows wrong mode" do + io = @o.rb_io_open_descriptor(File, @w_io.fileno, CApiIOSpecs::FMODE_READABLE, "a.txt", 60, "US-ASCII", "UTF-8", 0, {}) + io.should.is_a?(File) - platform_is :windows do - -> { io.read_nonblock(1) }.should raise_error(IO::EWOULDBLOCKWaitReadable) - end + platform_is_not :windows do + -> { io.read_nonblock(1) }.should raise_error(Errno::EBADF) end - it "tolerates NULL as rb_io_encoding *encoding parameter" do - io = @o.rb_io_open_descriptor_without_encoding(File, @r_io.fileno, 0, "a.txt", 60) - io.should.is_a?(File) + platform_is :windows do + -> { io.read_nonblock(1) }.should raise_error(IO::EWOULDBLOCKWaitReadable) end + end - it "deduplicates path String" do - path = "a.txt".dup - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) - io.path.should_not equal(path) + it "tolerates NULL as rb_io_encoding *encoding parameter" do + io = @o.rb_io_open_descriptor_without_encoding(File, @r_io.fileno, 0, "a.txt", 60) + io.should.is_a?(File) + end - path = "a.txt".freeze - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) - io.path.should_not equal(path) - end + it "deduplicates path String" do + path = "a.txt".dup + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) + io.path.should_not equal(path) + + path = "a.txt".freeze + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) + io.path.should_not equal(path) + end - it "calls #to_str to convert a path to a String" do - path = Object.new - def path.to_str; "a.txt"; end + it "calls #to_str to convert a path to a String" do + path = Object.new + def path.to_str; "a.txt"; end - io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) + io = @o.rb_io_open_descriptor(File, @r_io.fileno, 0, path, 60, "US-ASCII", "UTF-8", 0, {}) - io.path.should == "a.txt" - end + io.path.should == "a.txt" end end end diff --git a/spec/ruby/optional/capi/kernel_spec.rb b/spec/ruby/optional/capi/kernel_spec.rb index 6633ee50c1f8ac..0a2362fb304ab8 100644 --- a/spec/ruby/optional/capi/kernel_spec.rb +++ b/spec/ruby/optional/capi/kernel_spec.rb @@ -703,6 +703,12 @@ def proc_caller end end + describe "ruby_vm_at_exit" do + it "runs a C function after the VM is terminated" do + ruby_exe("require #{kernel_path.inspect}; CApiKernelSpecs.new.ruby_vm_at_exit").should == "ruby_vm_at_exit hook ran\n" + end + end + describe "rb_f_sprintf" do it "returns a string according to format and arguments" do @s.rb_f_sprintf(["%d %f %s", 10, 2.5, "test"]).should == "10 2.500000 test" diff --git a/spec/ruby/optional/capi/object_spec.rb b/spec/ruby/optional/capi/object_spec.rb index 8b4d8a9bba0e58..6716fd9e33766c 100644 --- a/spec/ruby/optional/capi/object_spec.rb +++ b/spec/ruby/optional/capi/object_spec.rb @@ -1004,7 +1004,6 @@ def reach it "calls the callback function for each cvar and ivar on a class" do exp = [:@@cvar, :foo, :@@cvar2, :bar, :@ivar, :baz] - exp.unshift(:__classpath__, 'CApiObjectSpecs::CVars') if RUBY_VERSION < "3.3" ary = @o.rb_ivar_foreach(CApiObjectSpecs::CVars) ary.should == exp @@ -1012,7 +1011,6 @@ def reach it "calls the callback function for each cvar and ivar on a module" do exp = [:@@mvar, :foo, :@@mvar2, :bar, :@ivar, :baz] - exp.unshift(:__classpath__, 'CApiObjectSpecs::MVars') if RUBY_VERSION < "3.3" ary = @o.rb_ivar_foreach(CApiObjectSpecs::MVars) ary.should == exp diff --git a/spec/ruby/optional/capi/spec_helper.rb b/spec/ruby/optional/capi/spec_helper.rb index e7abf46e6ccf65..d937c967d062fc 100644 --- a/spec/ruby/optional/capi/spec_helper.rb +++ b/spec/ruby/optional/capi/spec_helper.rb @@ -59,7 +59,11 @@ def compile_extension(name) tmpdir = tmp("cext_#{name}") Dir.mkdir(tmpdir) begin - ["#{core_ext_dir}/rubyspec.h", "#{spec_ext_dir}/#{ext}.c"].each do |file| + files = ["#{core_ext_dir}/rubyspec.h", "#{spec_ext_dir}/#{ext}.c"] + if spec_ext_dir != core_ext_dir + files += Dir.glob("#{spec_ext_dir}/*.h") + end + files.each do |file| if cxx and file.end_with?('.c') cp file, "#{tmpdir}/#{File.basename(file, '.c')}.cpp" else diff --git a/spec/ruby/optional/capi/string_spec.rb b/spec/ruby/optional/capi/string_spec.rb index 72f20ee6a52455..889f0a6cfe5d51 100644 --- a/spec/ruby/optional/capi/string_spec.rb +++ b/spec/ruby/optional/capi/string_spec.rb @@ -1369,8 +1369,133 @@ def inspect result1.should_not.equal?(result2) end + it "preserves the encoding of the original string" do + result1 = @s.rb_str_to_interned_str("hello".dup.force_encoding(Encoding::US_ASCII)) + result2 = @s.rb_str_to_interned_str("hello".dup.force_encoding(Encoding::UTF_8)) + result1.encoding.should == Encoding::US_ASCII + result2.encoding.should == Encoding::UTF_8 + end + it "returns the same string as String#-@" do @s.rb_str_to_interned_str("hello").should.equal?(-"hello") end end + + describe "rb_interned_str" do + it "returns a frozen string" do + str = "hello" + result = @s.rb_interned_str(str, str.bytesize) + result.should.is_a?(String) + result.should.frozen? + result.encoding.should == Encoding::US_ASCII + end + + it "returns the same frozen string" do + str = "hello" + result1 = @s.rb_interned_str(str, str.bytesize) + result2 = @s.rb_interned_str(str, str.bytesize) + result1.should.equal?(result2) + end + + it "supports strings with embedded null bytes" do + str = "foo\x00bar\x00baz".b + result = @s.rb_interned_str(str, str.bytesize) + result.should == str + end + + it "return US_ASCII encoding for an empty string" do + result = @s.rb_interned_str("", 0) + result.should == "" + result.encoding.should == Encoding::US_ASCII + end + + it "returns US_ASCII encoding for strings of only 7 bit ASCII" do + 0x00.upto(0x7f).each do |char| + result = @s.rb_interned_str(char.chr, 1) + result.encoding.should == Encoding::US_ASCII + end + end + + ruby_bug "21842", ""..."4.1" do + it "returns BINARY encoding for strings that use the 8th bit" do + 0x80.upto(0xff) do |char| + result = @s.rb_interned_str(char.chr, 1) + result.encoding.should == Encoding::BINARY + end + end + end + + it 'returns the same string when using non-ascii characters' do + str = 'こんにちは' + result1 = @s.rb_interned_str(str, str.bytesize) + result2 = @s.rb_interned_str(str, str.bytesize) + result1.should.equal?(result2) + end + + ruby_bug "21842", ""..."4.1" do + it "returns the same string as String#-@" do + str = "hello".dup.force_encoding(Encoding::US_ASCII) + @s.rb_interned_str(str, str.bytesize).should.equal?(-str) + end + end + end + + describe "rb_interned_str_cstr" do + it "returns a frozen string" do + str = "hello" + result = @s.rb_interned_str_cstr(str) + result.should.is_a?(String) + result.should.frozen? + result.encoding.should == Encoding::US_ASCII + end + + it "returns the same frozen string" do + str = "hello" + result1 = @s.rb_interned_str_cstr(str) + result2 = @s.rb_interned_str_cstr(str) + result1.should.equal?(result2) + end + + it "does not support strings with embedded null bytes" do + str = "foo\x00bar\x00baz".b + result = @s.rb_interned_str_cstr(str) + result.should == "foo" + end + + it "return US_ASCII encoding for an empty string" do + result = @s.rb_interned_str_cstr("") + result.should == "" + result.encoding.should == Encoding::US_ASCII + end + + it "returns US_ASCII encoding for strings of only 7 bit ASCII" do + 0x01.upto(0x7f).each do |char| + result = @s.rb_interned_str_cstr(char.chr) + result.encoding.should == Encoding::US_ASCII + end + end + + ruby_bug "21842", ""..."4.1" do + it "returns BINARY encoding for strings that use the 8th bit" do + 0x80.upto(0xff) do |char| + result = @s.rb_interned_str_cstr(char.chr) + result.encoding.should == Encoding::BINARY + end + end + end + + it 'returns the same string when using non-ascii characters' do + str = 'こんにちは' + result1 = @s.rb_interned_str_cstr(str) + result2 = @s.rb_interned_str_cstr(str) + result1.should.equal?(result2) + end + + ruby_bug "21842", ""..."4.1" do + it "returns the same string as String#-@" do + str = "hello".dup.force_encoding(Encoding::US_ASCII) + @s.rb_interned_str_cstr(str).should.equal?(-str) + end + end + end end diff --git a/spec/ruby/optional/capi/struct_spec.rb b/spec/ruby/optional/capi/struct_spec.rb index cc8d7f932e53b1..3f9eff52bc0b10 100644 --- a/spec/ruby/optional/capi/struct_spec.rb +++ b/spec/ruby/optional/capi/struct_spec.rb @@ -239,78 +239,76 @@ end end -ruby_version_is "3.3" do - describe "C-API Data function" do - before :all do - @s = CApiStructSpecs.new - @klass = @s.rb_data_define(nil, "a", "b", "c") - end - - describe "rb_data_define" do - it "returns a subclass of Data class when passed nil as the first argument" do - @klass.should.is_a? Class - @klass.superclass.should == Data - end - - it "returns a subclass of a class when passed as the first argument" do - superclass = Class.new(Data) - klass = @s.rb_data_define(superclass, "a", "b", "c") - - klass.should.is_a? Class - klass.superclass.should == superclass - end - - it "creates readers for the members" do - obj = @klass.new(1, 2, 3) - - obj.a.should == 1 - obj.b.should == 2 - obj.c.should == 3 - end - - it "returns the member names as Symbols" do - obj = @klass.new(0, 0, 0) - - obj.members.should == [:a, :b, :c] - end - - it "raises an ArgumentError if arguments contain duplicate member name" do - -> { @s.rb_data_define(nil, "a", "b", "a") }.should raise_error(ArgumentError) - end - - it "raises when first argument is not a class" do - -> { @s.rb_data_define([], "a", "b", "c") }.should raise_error(TypeError, "wrong argument type Array (expected Class)") - end - end - - describe "rb_struct_initialize" do - it "sets all members for a Data instance" do - data = @klass.allocate - @s.rb_struct_initialize(data, [1, 2, 3]).should == nil - data.a.should == 1 - data.b.should == 2 - data.c.should == 3 - end - - it "freezes the Data instance" do - data = @klass.allocate - @s.rb_struct_initialize(data, [1, 2, 3]).should == nil - data.should.frozen? - -> { @s.rb_struct_initialize(data, [1, 2, 3]) }.should raise_error(FrozenError) - end - - it "raises ArgumentError if too many values" do - data = @klass.allocate - -> { @s.rb_struct_initialize(data, [1, 2, 3, 4]) }.should raise_error(ArgumentError, "struct size differs") - end - - it "treats missing values as nil" do - data = @klass.allocate - @s.rb_struct_initialize(data, [1, 2]).should == nil - data.a.should == 1 - data.b.should == 2 - data.c.should == nil - end +describe "C-API Data function" do + before :all do + @s = CApiStructSpecs.new + @klass = @s.rb_data_define(nil, "a", "b", "c") + end + + describe "rb_data_define" do + it "returns a subclass of Data class when passed nil as the first argument" do + @klass.should.is_a? Class + @klass.superclass.should == Data + end + + it "returns a subclass of a class when passed as the first argument" do + superclass = Class.new(Data) + klass = @s.rb_data_define(superclass, "a", "b", "c") + + klass.should.is_a? Class + klass.superclass.should == superclass + end + + it "creates readers for the members" do + obj = @klass.new(1, 2, 3) + + obj.a.should == 1 + obj.b.should == 2 + obj.c.should == 3 + end + + it "returns the member names as Symbols" do + obj = @klass.new(0, 0, 0) + + obj.members.should == [:a, :b, :c] + end + + it "raises an ArgumentError if arguments contain duplicate member name" do + -> { @s.rb_data_define(nil, "a", "b", "a") }.should raise_error(ArgumentError) + end + + it "raises when first argument is not a class" do + -> { @s.rb_data_define([], "a", "b", "c") }.should raise_error(TypeError, "wrong argument type Array (expected Class)") + end + end + + describe "rb_struct_initialize" do + it "sets all members for a Data instance" do + data = @klass.allocate + @s.rb_struct_initialize(data, [1, 2, 3]).should == nil + data.a.should == 1 + data.b.should == 2 + data.c.should == 3 + end + + it "freezes the Data instance" do + data = @klass.allocate + @s.rb_struct_initialize(data, [1, 2, 3]).should == nil + data.should.frozen? + -> { @s.rb_struct_initialize(data, [1, 2, 3]) }.should raise_error(FrozenError) + end + + it "raises ArgumentError if too many values" do + data = @klass.allocate + -> { @s.rb_struct_initialize(data, [1, 2, 3, 4]) }.should raise_error(ArgumentError, "struct size differs") + end + + it "treats missing values as nil" do + data = @klass.allocate + @s.rb_struct_initialize(data, [1, 2]).should == nil + data.a.should == 1 + data.b.should == 2 + data.c.should == nil end end end diff --git a/spec/ruby/security/cve_2020_10663_spec.rb b/spec/ruby/security/cve_2020_10663_spec.rb index c44a13a0dd4b5d..7f42c407420b46 100644 --- a/spec/ruby/security/cve_2020_10663_spec.rb +++ b/spec/ruby/security/cve_2020_10663_spec.rb @@ -21,7 +21,7 @@ def to_json(*args) guard -> { JSON.const_defined?(:Pure) or - version_is(JSON::VERSION, '2.3.0') + version_is(JSON::VERSION, '2.3.0'...'2.11.0') } do describe "CVE-2020-10663 is resisted by" do it "only creating custom objects if passed create_additions: true or using JSON.load" do diff --git a/spec/ruby/shared/kernel/at_exit.rb b/spec/ruby/shared/kernel/at_exit.rb index 29db79bb391428..d57ab73920f3fa 100644 --- a/spec/ruby/shared/kernel/at_exit.rb +++ b/spec/ruby/shared/kernel/at_exit.rb @@ -60,10 +60,7 @@ result = ruby_exe('{', options: "-r#{script}", args: "2>&1", exit_status: 1) $?.should_not.success? result.should.include?("handler ran\n") - - # it's tempting not to rely on error message and rely only on exception class name, - # but CRuby before 3.2 doesn't print class name for syntax error - result.should include_any_of("syntax error", "SyntaxError") + result.should include("SyntaxError") end it "calls the nested handler right after the outer one if a handler is nested into another handler" do diff --git a/spec/ruby/shared/queue/freeze.rb b/spec/ruby/shared/queue/freeze.rb index 4c506a42355f62..5dedd005df4975 100644 --- a/spec/ruby/shared/queue/freeze.rb +++ b/spec/ruby/shared/queue/freeze.rb @@ -1,18 +1,8 @@ describe :queue_freeze, shared: true do - ruby_version_is ""..."3.3" do - it "can be frozen" do - queue = @object.call + it "raises an exception when freezing" do + queue = @object.call + -> { queue.freeze - queue.should.frozen? - end - end - - ruby_version_is "3.3" do - it "raises an exception when freezing" do - queue = @object.call - -> { - queue.freeze - }.should raise_error(TypeError, "cannot freeze #{queue}") - end + }.should raise_error(TypeError, "cannot freeze #{queue}") end end diff --git a/spec/ruby/shared/string/start_with.rb b/spec/ruby/shared/string/start_with.rb index 4b947a3bbf0ea8..9592eda4d43d31 100644 --- a/spec/ruby/shared/string/start_with.rb +++ b/spec/ruby/shared/string/start_with.rb @@ -70,15 +70,7 @@ $1.should be_nil end - ruby_version_is ""..."3.3" do - it "does not check that we are not matching part of a character" do - "\xC3\xA9".send(@method).should.start_with?("\xC3") - end - end - - ruby_version_is "3.3" do # #19784 - it "checks that we are not matching part of a character" do - "\xC3\xA9".send(@method).should_not.start_with?("\xC3") - end + it "checks that we are not matching part of a character" do + "\xC3\xA9".send(@method).should_not.start_with?("\xC3") end end From c8f01d599b962a7ae0183795162cbdfd9ee7aeb3 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 28 Jan 2026 22:31:09 +0100 Subject: [PATCH 42/77] ruby/spec no longer supports 3.2 --- .github/workflows/spec_guards.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/spec_guards.yml b/.github/workflows/spec_guards.yml index 856d6f61eb9303..0a5104f1419298 100644 --- a/.github/workflows/spec_guards.yml +++ b/.github/workflows/spec_guards.yml @@ -39,7 +39,6 @@ jobs: # Specs from ruby/spec should still run on all supported Ruby versions. # This also ensures the needed ruby_version_is guards are there, see spec/README.md. ruby: - - ruby-3.2 - ruby-3.3 - ruby-3.4 - ruby-4.0 From 554ca2eb622d1cfe886b419a54293d231a6c19f7 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 27 Jan 2026 21:05:57 -0500 Subject: [PATCH 43/77] [DOC] Fix broken link in Coverage.setup --- ext/coverage/coverage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/coverage/coverage.c b/ext/coverage/coverage.c index 93acdb24806f78..41f33f4fb8c9f3 100644 --- a/ext/coverage/coverage.c +++ b/ext/coverage/coverage.c @@ -70,7 +70,7 @@ rb_coverage_supported(VALUE self, VALUE _mode) * If +lines+ is enabled, +oneshot_lines+ cannot be enabled. * See {Lines Coverage}[rdoc-ref:Coverage@Lines+Coverage]. * - +branches+: Enables branch coverage that records the number of times each - * branch in each conditional was executed. See {Branches Coverage}[rdoc-ref:Coverage@Branch+Coverage]. + * branch in each conditional was executed. See {Branches Coverage}[rdoc-ref:Coverage@Branches+Coverage]. * - +methods+: Enables method coverage that records the number of times each method was exectued. * See {Methods Coverage}[rdoc-ref:Coverage@Methods+Coverage]. * - +eval+: Enables coverage for evaluations (e.g. Kernel#eval, Module#class_eval). From f2fde274506fa1731e576d9fca237764103b56db Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Tue, 27 Jan 2026 18:28:44 +0900 Subject: [PATCH 44/77] [ruby/rubygems] Only use parent source with Git and Path sources https://github.com/ruby/rubygems/commit/c5da276610 --- lib/bundler/definition.rb | 2 +- spec/bundler/install/gemfile/sources_spec.rb | 41 ++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/lib/bundler/definition.rb b/lib/bundler/definition.rb index 5ab577f504c39d..639740e46b6b04 100644 --- a/lib/bundler/definition.rb +++ b/lib/bundler/definition.rb @@ -1077,7 +1077,7 @@ def converge_specs(specs) end end - if parent_dep + if parent_dep && parent_dep.source.is_a?(Source::Path) replacement_source = parent_dep.source else replacement_source = sources.get(lockfile_source) diff --git a/spec/bundler/install/gemfile/sources_spec.rb b/spec/bundler/install/gemfile/sources_spec.rb index 90f87ed0c5daea..69b0816a18999e 100644 --- a/spec/bundler/install/gemfile/sources_spec.rb +++ b/spec/bundler/install/gemfile/sources_spec.rb @@ -1195,4 +1195,45 @@ expect(gem_section).not_to include("activerecord (7.0.0)") end end + + context "when a scoped rubygems source is missing a transitive dependency" do + before do + build_repo2 do + build_gem "fallback_dep", "1.0.0" + build_gem "foo", "1.0.0" + end + + build_repo3 do + build_gem "private_parent", "1.0.0" do |s| + s.add_dependency "fallback_dep" + end + end + + gemfile <<-G + source "https://gem.repo2" + + gem "foo" + + source "https://gem.repo3" do + gem "private_parent", "1.0.0" + end + G + + bundle :install, artifice: "compact_index" + end + + it "falls back to the default rubygems source for that dependency" do + build_repo2 do + build_gem "foo", "2.0.0" + end + + system_gems [] + + bundle "update foo", artifice: "compact_index" + + expect(the_bundle).to include_gems("private_parent 1.0.0", "fallback_dep 1.0.0", "foo 2.0.0") + expect(the_bundle).to include_gems("private_parent 1.0.0", source: "remote3") + expect(the_bundle).to include_gems("fallback_dep 1.0.0", source: "remote2") + end + end end From 0c30897d0bf579ee7be08fc828932e1bac1196aa Mon Sep 17 00:00:00 2001 From: Andrii Furmanets Date: Sun, 23 Nov 2025 19:19:22 +0200 Subject: [PATCH 45/77] [ruby/rubygems] Remove outdated TODO in RemoteFetcher https://github.com/ruby/rubygems/commit/cc81b8b228 --- lib/rubygems/remote_fetcher.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rubygems/remote_fetcher.rb b/lib/rubygems/remote_fetcher.rb index 805f7aaf82ed1a..151c6fd4d8bbdf 100644 --- a/lib/rubygems/remote_fetcher.rb +++ b/lib/rubygems/remote_fetcher.rb @@ -174,7 +174,7 @@ def download(spec, source_uri, install_dir = Gem.dir) end verbose "Using local gem #{local_gem_path}" - when nil then # TODO: test for local overriding cache + when nil then source_path = if Gem.win_platform? && source_uri.scheme && !source_uri.path.include?(":") "#{source_uri.scheme}:#{source_uri.path}" From fa09afb15c9cf901d84e2963b86c4c7a7d0e104e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Jan 2026 21:23:15 -0500 Subject: [PATCH 46/77] [ruby/prism] Support `version: "nearest"`. This clamps to supported versions based on the current Ruby version. https://github.com/ruby/prism/commit/eb63748e8b --- lib/prism/ffi.rb | 24 ++++++++++++++++++++---- prism/extension.c | 27 ++++++++++++++++++++++----- prism/options.h | 8 +++++++- 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index d4c9d60c9aa2a4..57d878a33fa299 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -423,10 +423,26 @@ def dump_options_command_line(options) # Return the value that should be dumped for the version option. def dump_options_version(version) - current = version == "current" + checking = + case version + when "current" + RUBY_VERSION + when "latest" + nil + when "nearest" + if RUBY_VERSION <= "3.3" + "3.3" + elsif RUBY_VERSION >= "4.1" + "4.1" + else + RUBY_VERSION + end + else + version + end - case current ? RUBY_VERSION : version - when nil, "latest" + case checking + when nil 0 # Handled in pm_parser_init when /\A3\.3(\.\d+)?\z/ 1 @@ -437,7 +453,7 @@ def dump_options_version(version) when /\A4\.1(\.\d+)?\z/ 4 else - if current + if version == "current" raise CurrentVersionError, RUBY_VERSION else raise ArgumentError, "invalid version: #{version}" diff --git a/prism/extension.c b/prism/extension.c index 400546a4ce0364..cde10bf360df2a 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -201,9 +201,24 @@ build_options_i(VALUE key, VALUE value, VALUE argument) { const char *version = check_string(value); if (RSTRING_LEN(value) == 7 && strncmp(version, "current", 7) == 0) { - const char *current_version = RSTRING_PTR(rb_const_get(rb_cObject, rb_intern("RUBY_VERSION"))); - if (!pm_options_version_set(options, current_version, 3)) { - rb_exc_raise(rb_exc_new_cstr(rb_cPrismCurrentVersionError, current_version)); + const char *ruby_version = RSTRING_PTR(rb_const_get(rb_cObject, rb_intern("RUBY_VERSION"))); + if (!pm_options_version_set(options, ruby_version, 3)) { + rb_exc_raise(rb_exc_new_cstr(rb_cPrismCurrentVersionError, ruby_version)); + } + } else if (RSTRING_LEN(value) == 7 && strncmp(version, "nearest", 7) == 0) { + const char *ruby_version = RSTRING_PTR(rb_const_get(rb_cObject, rb_intern("RUBY_VERSION"))); + const char *nearest_version; + + if (ruby_version[0] < '3' || (ruby_version[0] == '3' && ruby_version[2] < '3')) { + nearest_version = "3.3"; + } else if (ruby_version[0] > '4' || (ruby_version[0] == '4' && ruby_version[2] > '1')) { + nearest_version = "4.1"; + } else { + nearest_version = ruby_version; + } + + if (!pm_options_version_set(options, nearest_version, 3)) { + rb_raise(rb_eArgError, "invalid nearest version: %s", nearest_version); } } else if (!pm_options_version_set(options, version, RSTRING_LEN(value))) { rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value); @@ -894,8 +909,10 @@ parse_input(pm_string_t *input, const pm_options_t *options) { * version of Ruby syntax (which you can trigger with `nil` or * `"latest"`). You may also restrict the syntax to a specific version of * Ruby, e.g., with `"3.3.0"`. To parse with the same syntax version that - * the current Ruby is running use `version: "current"`. Raises - * ArgumentError if the version is not currently supported by Prism. + * the current Ruby is running use `version: "current"`. To parse with the + * nearest version to the current Ruby that is running, use + * `version: "nearest"`. Raises ArgumentError if the version is not + * currently supported by Prism. */ static VALUE parse(int argc, VALUE *argv, VALUE self) { diff --git a/prism/options.h b/prism/options.h index c00c7bf7553a4f..9a19a2aeadf31a 100644 --- a/prism/options.h +++ b/prism/options.h @@ -82,7 +82,10 @@ typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const * parse in the same way as a specific version of CRuby would have. */ typedef enum { - /** If an explicit version is not provided, the current version of prism will be used. */ + /** + * If an explicit version is not provided, the current version of prism will + * be used. + */ PM_OPTIONS_VERSION_UNSET = 0, /** The vendored version of prism in CRuby 3.3.x. */ @@ -452,6 +455,9 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); * | ----- | ------------------------- | * | `0` | use the latest version of prism | * | `1` | use the version of prism that is vendored in CRuby 3.3.0 | + * | `2` | use the version of prism that is vendored in CRuby 3.4.0 | + * | `3` | use the version of prism that is vendored in CRuby 4.0.0 | + * | `4` | use the version of prism that is vendored in CRuby 4.1.0 | * * Each scope is laid out as follows: * From 40e3e43b9380aa339b16471c7e9d9de6894f5ab3 Mon Sep 17 00:00:00 2001 From: BurdetteLamar Date: Tue, 16 Dec 2025 20:38:15 +0000 Subject: [PATCH 47/77] [ruby/net-http] [DOC] Fix links in requests.rb https://github.com/ruby/net-http/commit/a232aea2fc --- lib/net/http/requests.rb | 48 ++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/lib/net/http/requests.rb b/lib/net/http/requests.rb index 939d413f91961c..8dc79a9f665d52 100644 --- a/lib/net/http/requests.rb +++ b/lib/net/http/requests.rb @@ -19,9 +19,9 @@ # # - Request body: optional. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: yes. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: yes. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: yes. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: yes. # # Related: # @@ -52,9 +52,9 @@ class Net::HTTP::Get < Net::HTTPRequest # # - Request body: optional. # - Response body: no. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: yes. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: yes. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: yes. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: yes. # # Related: # @@ -87,9 +87,9 @@ class Net::HTTP::Head < Net::HTTPRequest # # - Request body: yes. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: no. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: no. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: yes. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: no. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: no. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: yes. # # Related: # @@ -123,9 +123,9 @@ class Net::HTTP::Post < Net::HTTPRequest # # - Request body: yes. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: no. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: no. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: no. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: no. # # Related: # @@ -157,9 +157,9 @@ class Net::HTTP::Put < Net::HTTPRequest # # - Request body: optional. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: no. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: no. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: no. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: no. # # Related: # @@ -189,9 +189,9 @@ class Net::HTTP::Delete < Net::HTTPRequest # # - Request body: optional. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: yes. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: no. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: yes. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: no. # # Related: # @@ -221,9 +221,9 @@ class Net::HTTP::Options < Net::HTTPRequest # # - Request body: no. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: yes. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: yes. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: no. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: yes. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: yes. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: no. # # Related: # @@ -256,9 +256,9 @@ class Net::HTTP::Trace < Net::HTTPRequest # # - Request body: yes. # - Response body: yes. -# - {Safe}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Safe_methods]: no. -# - {Idempotent}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Idempotent_methods]: no. -# - {Cacheable}[https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Cacheable_methods]: no. +# - {Safe}[https://en.wikipedia.org/wiki/HTTP#Safe_method]: no. +# - {Idempotent}[https://en.wikipedia.org/wiki/HTTP#Idempotent_method]: no. +# - {Cacheable}[https://en.wikipedia.org/wiki/HTTP#Cacheable_method]: no. # # Related: # From 6f16e87ff06c412edc0cac334b3c078aa0fafa22 Mon Sep 17 00:00:00 2001 From: Masafumi Koba <473530+ybiquitous@users.noreply.github.com> Date: Fri, 16 Jan 2026 18:23:21 +0900 Subject: [PATCH 48/77] [ruby/open-uri] Improve URI.open documentation with usage example This improves the `URI.open` method documentation by adding a code example requiring `open-uri` as a basic usage. When reading the current documentation first, I didn't realize that `open-uri` was required to call the method. I believe the improved version could be more helpful for new users. ```sh-session $ ruby -r uri -e 'p URI.open("http://example.com")' -e:1:in '
': private method 'open' called for module URI (NoMethodError) ``` Ref https://docs.ruby-lang.org/en/master/URI.html#method-c-open Also, this improves formatting with code fonts for better readability. https://github.com/ruby/open-uri/commit/f4400edc27 --- lib/open-uri.rb | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/open-uri.rb b/lib/open-uri.rb index 5983c7368b1d35..844865b13ac0a1 100644 --- a/lib/open-uri.rb +++ b/lib/open-uri.rb @@ -4,22 +4,25 @@ require 'time' module URI - # Allows the opening of various resources including URIs. + # Allows the opening of various resources including URIs. Example: # - # If the first argument responds to the 'open' method, 'open' is called on + # require "open-uri" + # URI.open("http://example.com") { |f| f.read } + # + # If the first argument responds to the +open+ method, +open+ is called on # it with the rest of the arguments. # # If the first argument is a string that begins with (protocol)://, it is parsed by - # URI.parse. If the parsed object responds to the 'open' method, - # 'open' is called on it with the rest of the arguments. + # URI.parse. If the parsed object responds to the +open+ method, + # +open+ is called on it with the rest of the arguments. # # Otherwise, Kernel#open is called. # # OpenURI::OpenRead#open provides URI::HTTP#open, URI::HTTPS#open and # URI::FTP#open, Kernel#open. # - # We can accept URIs and strings that begin with http://, https:// and - # ftp://. In these cases, the opened file object is extended by OpenURI::Meta. + # We can accept URIs and strings that begin with http://, https:// and + # ftp://. In these cases, the opened file object is extended by OpenURI::Meta. def self.open(name, *rest, &block) if name.respond_to?(:open) name.open(*rest, &block) From ba8b0b3a9c867421166083c2eb35204c616beb4b Mon Sep 17 00:00:00 2001 From: git Date: Thu, 29 Jan 2026 07:04:29 +0000 Subject: [PATCH 49/77] Update bundled gems list as of 2026-01-29 --- NEWS.md | 1 + gems/bundled_gems | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 38ca8667a87ecb..9a1dd9c9337587 100644 --- a/NEWS.md +++ b/NEWS.md @@ -62,6 +62,7 @@ releases. * debug 1.11.1 * mutex_m 0.3.0 * resolv-replace 0.2.0 +* syslog 0.4.0 * rdoc 7.1.0 ### RubyGems and Bundler diff --git a/gems/bundled_gems b/gems/bundled_gems index 98a6ca2cea8ea2..e85061bc0f842e 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -32,7 +32,7 @@ resolv-replace 0.2.0 https://github.com/ruby/resolv-replace rinda 0.2.0 https://github.com/ruby/rinda drb 2.2.3 https://github.com/ruby/drb nkf 0.2.0 https://github.com/ruby/nkf -syslog 0.3.0 https://github.com/ruby/syslog +syslog 0.4.0 https://github.com/ruby/syslog csv 3.3.5 https://github.com/ruby/csv repl_type_completor 0.1.12 https://github.com/ruby/repl_type_completor 26b8e964557690c0b539cff8940bcfb1591f1fe6 ostruct 0.6.3 https://github.com/ruby/ostruct From cd26647f537e6c3d1ab3fe5675df47fdbddf8d48 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 29 Jan 2026 15:30:10 +0900 Subject: [PATCH 50/77] Remove dangling taintedness macros --- include/ruby/internal/fl_type.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/include/ruby/internal/fl_type.h b/include/ruby/internal/fl_type.h index 2afb3f1fa348f2..e3878d9ed7d567 100644 --- a/include/ruby/internal/fl_type.h +++ b/include/ruby/internal/fl_type.h @@ -108,8 +108,6 @@ #define RB_OBJ_FREEZE_RAW RB_OBJ_FREEZE_RAW #define RB_OBJ_FROZEN RB_OBJ_FROZEN #define RB_OBJ_FROZEN_RAW RB_OBJ_FROZEN_RAW -#define RB_OBJ_UNTRUST RB_OBJ_TAINT -#define RB_OBJ_UNTRUSTED RB_OBJ_TAINTED /** @endcond */ /** @@ -134,15 +132,6 @@ #define OBJ_FREEZE_RAW RB_OBJ_FREEZE_RAW /**< @old{RB_OBJ_FREEZE_RAW} */ #define OBJ_FROZEN RB_OBJ_FROZEN /**< @old{RB_OBJ_FROZEN} */ #define OBJ_FROZEN_RAW RB_OBJ_FROZEN_RAW /**< @old{RB_OBJ_FROZEN_RAW} */ -#define OBJ_INFECT RB_OBJ_INFECT /**< @old{RB_OBJ_INFECT} */ -#define OBJ_INFECT_RAW RB_OBJ_INFECT_RAW /**< @old{RB_OBJ_INFECT_RAW} */ -#define OBJ_TAINT RB_OBJ_TAINT /**< @old{RB_OBJ_TAINT} */ -#define OBJ_TAINTABLE RB_OBJ_TAINTABLE /**< @old{RB_OBJ_TAINT_RAW} */ -#define OBJ_TAINTED RB_OBJ_TAINTED /**< @old{RB_OBJ_TAINTED} */ -#define OBJ_TAINTED_RAW RB_OBJ_TAINTED_RAW /**< @old{RB_OBJ_TAINTED_RAW} */ -#define OBJ_TAINT_RAW RB_OBJ_TAINT_RAW /**< @old{RB_OBJ_TAINT_RAW} */ -#define OBJ_UNTRUST RB_OBJ_UNTRUST /**< @old{RB_OBJ_TAINT} */ -#define OBJ_UNTRUSTED RB_OBJ_UNTRUSTED /**< @old{RB_OBJ_TAINTED} */ /** @} */ /** From 62493b572d2b6a6984cbbdc32db942dbf894d056 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 29 Jan 2026 12:18:26 +0900 Subject: [PATCH 51/77] Fix assertions not to be affected by the default encoding --- spec/ruby/core/io/buffer/map_spec.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spec/ruby/core/io/buffer/map_spec.rb b/spec/ruby/core/io/buffer/map_spec.rb index d980eb0ae0451a..80ee65125ee170 100644 --- a/spec/ruby/core/io/buffer/map_spec.rb +++ b/spec/ruby/core/io/buffer/map_spec.rb @@ -12,11 +12,11 @@ end def open_fixture - File.open("#{__dir__}/../fixtures/read_text.txt", "r+") + File.open("#{__dir__}/../fixtures/read_text.txt", "rb+") end def open_big_file_fixture - File.open(@big_file_name, "r+") + File.open(@big_file_name, "rb+") end after :each do @@ -308,11 +308,11 @@ def open_big_file_fixture @buffer.set_string("test12345") @buffer.get_string.should == "test12345".b - @file.read.should == "abcâdef\n" + @file.read.should == "abcâdef\n".b end it "allows mapping read-only files and modifying the buffer" do - @file = File.open("#{__dir__}/../fixtures/read_text.txt", "r") + @file = File.open("#{__dir__}/../fixtures/read_text.txt", "rb") @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::PRIVATE) @buffer.should.private? @@ -323,7 +323,7 @@ def open_big_file_fixture @buffer.set_string("test12345") @buffer.get_string.should == "test12345".b - @file.read.should == "abcâdef\n" + @file.read.should == "abcâdef\n".b end platform_is_not :windows do From d3ea389dec2914e8c247f83091ccea922aadae80 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 29 Jan 2026 12:23:10 +0900 Subject: [PATCH 52/77] Remove too platform sensitive assertions --- spec/ruby/core/io/buffer/map_spec.rb | 36 ++++------------------------ 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/spec/ruby/core/io/buffer/map_spec.rb b/spec/ruby/core/io/buffer/map_spec.rb index 80ee65125ee170..7ab08fd425d582 100644 --- a/spec/ruby/core/io/buffer/map_spec.rb +++ b/spec/ruby/core/io/buffer/map_spec.rb @@ -89,24 +89,17 @@ def open_big_file_fixture end context "with an empty file" do - ruby_version_is ""..."4.0" do - it "raises a SystemCallError" do - @file = File.open("#{__dir__}/../fixtures/empty.txt", "r+") - -> { IO::Buffer.map(@file) }.should raise_error(SystemCallError) - end - end - ruby_version_is "4.0" do it "raises ArgumentError" do - @file = File.open("#{__dir__}/../fixtures/empty.txt", "r+") + @file = File.open("#{__dir__}/../fixtures/empty.txt", "rb+") -> { IO::Buffer.map(@file) }.should raise_error(ArgumentError, "Invalid negative or zero file size!") end end end context "with a file opened only for reading" do - it "raises a SystemCallError if no flags are used" do - @file = File.open("#{__dir__}/../fixtures/read_text.txt", "r") + it "raises a SystemCallError unless read-only" do + @file = File.open("#{__dir__}/../fixtures/read_text.txt", "rb") -> { IO::Buffer.map(@file) }.should raise_error(SystemCallError) end end @@ -128,15 +121,6 @@ def open_big_file_fixture end context "if size is 0" do - ruby_version_is ""..."4.0" do - platform_is_not :windows do - it "raises a SystemCallError" do - @file = open_fixture - -> { IO::Buffer.map(@file, 0) }.should raise_error(SystemCallError) - end - end - end - ruby_version_is "4.0" do it "raises ArgumentError" do @file = open_fixture @@ -247,18 +231,6 @@ def open_big_file_fixture -> { IO::Buffer.map(@file, 4, nil) }.should raise_error(TypeError, /no implicit conversion/) end - it "raises a SystemCallError if offset is not an allowed value" do - @file = open_fixture - -> { IO::Buffer.map(@file, 4, 3) }.should raise_error(SystemCallError) - end - - ruby_version_is ""..."4.0" do - it "raises a SystemCallError if offset is negative" do - @file = open_fixture - -> { IO::Buffer.map(@file, 4, -1) }.should raise_error(SystemCallError) - end - end - ruby_version_is "4.0" do it "raises ArgumentError if offset is negative" do @file = open_fixture @@ -279,7 +251,7 @@ def open_big_file_fixture end it "allows mapping read-only files" do - @file = File.open("#{__dir__}/../fixtures/read_text.txt", "r") + @file = File.open("#{__dir__}/../fixtures/read_text.txt", "rb") @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::READONLY) @buffer.should.readonly? From e48ed4b979b5c9105238011707465c84a2154060 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 29 Jan 2026 13:33:32 +0900 Subject: [PATCH 53/77] Do not open to write the fixture files --- spec/ruby/core/io/buffer/map_spec.rb | 31 ++++++++++++++++++------- spec/ruby/core/io/buffer/shared_spec.rb | 9 +++++-- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/spec/ruby/core/io/buffer/map_spec.rb b/spec/ruby/core/io/buffer/map_spec.rb index 7ab08fd425d582..715aa9d34400ba 100644 --- a/spec/ruby/core/io/buffer/map_spec.rb +++ b/spec/ruby/core/io/buffer/map_spec.rb @@ -2,20 +2,32 @@ describe "IO::Buffer.map" do before :all do - @big_file_name = tmp("big_file") - # Usually 4 kibibytes + 16 bytes - File.write(@big_file_name, "12345678" * (IO::Buffer::PAGE_SIZE / 8 + 2)) + @tmp_files = [] + + @big_file_name = nil + @small_file_name = nil end after :all do - File.delete(@big_file_name) + @tmp_files.each {|file| File.delete(file)} end def open_fixture - File.open("#{__dir__}/../fixtures/read_text.txt", "rb+") + unless @small_file_name + @small_file_name = tmp("read_text.txt") + File.copy_stream(fixture(__dir__, "read_text.txt"), @small_file_name) + @tmp_files << @small_file_name + end + File.open(@small_file_name, "rb+") end def open_big_file_fixture + unless @big_file_name + @big_file_name = tmp("big_file") + # Usually 4 kibibytes + 16 bytes + File.write(@big_file_name, "12345678" * (IO::Buffer::PAGE_SIZE / 8 + 2)) + @tmp_files << @big_file_name + end File.open(@big_file_name, "rb+") end @@ -91,7 +103,8 @@ def open_big_file_fixture context "with an empty file" do ruby_version_is "4.0" do it "raises ArgumentError" do - @file = File.open("#{__dir__}/../fixtures/empty.txt", "rb+") + file_name = tmp("empty.txt") + @file = File.open(file_name, "wb+") -> { IO::Buffer.map(@file) }.should raise_error(ArgumentError, "Invalid negative or zero file size!") end end @@ -99,7 +112,7 @@ def open_big_file_fixture context "with a file opened only for reading" do it "raises a SystemCallError unless read-only" do - @file = File.open("#{__dir__}/../fixtures/read_text.txt", "rb") + @file = File.open(fixture(__dir__, "read_text.txt"), "rb") -> { IO::Buffer.map(@file) }.should raise_error(SystemCallError) end end @@ -251,7 +264,7 @@ def open_big_file_fixture end it "allows mapping read-only files" do - @file = File.open("#{__dir__}/../fixtures/read_text.txt", "rb") + @file = File.open(fixture(__dir__, "read_text.txt"), "rb") @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::READONLY) @buffer.should.readonly? @@ -284,7 +297,7 @@ def open_big_file_fixture end it "allows mapping read-only files and modifying the buffer" do - @file = File.open("#{__dir__}/../fixtures/read_text.txt", "rb") + @file = File.open(fixture(__dir__, "read_text.txt"), "rb") @buffer = IO::Buffer.map(@file, nil, 0, IO::Buffer::PRIVATE) @buffer.should.private? diff --git a/spec/ruby/core/io/buffer/shared_spec.rb b/spec/ruby/core/io/buffer/shared_spec.rb index 4f3bce5448fee0..be8c29471af880 100644 --- a/spec/ruby/core/io/buffer/shared_spec.rb +++ b/spec/ruby/core/io/buffer/shared_spec.rb @@ -12,10 +12,15 @@ end it "is true for a non-private buffer created with .map" do - file = File.open("#{__dir__}/../fixtures/read_text.txt", "r+") + path = tmp("read_text.txt") + File.copy_stream(fixture(__dir__, "read_text.txt"), path) + file = File.open(path, "r+") @buffer = IO::Buffer.map(file) - file.close @buffer.shared?.should be_true + ensure + @buffer.free + file.close + File.unlink(path) end it "is false for an unshared buffer" do From d3d1a97486136801f9a37d959f32652385ebc741 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Thu, 29 Jan 2026 11:13:19 +0100 Subject: [PATCH 54/77] Update to ruby/spec@8c98a84 --- spec/ruby/core/io/buffer/map_spec.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/ruby/core/io/buffer/map_spec.rb b/spec/ruby/core/io/buffer/map_spec.rb index 715aa9d34400ba..d60036307f8774 100644 --- a/spec/ruby/core/io/buffer/map_spec.rb +++ b/spec/ruby/core/io/buffer/map_spec.rb @@ -105,6 +105,7 @@ def open_big_file_fixture it "raises ArgumentError" do file_name = tmp("empty.txt") @file = File.open(file_name, "wb+") + @tmp_files << file_name -> { IO::Buffer.map(@file) }.should raise_error(ArgumentError, "Invalid negative or zero file size!") end end From 6d07151eb141daffa0f36149dcff1cc044c37893 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 28 Jan 2026 15:55:05 +0900 Subject: [PATCH 55/77] Win32: Mark batch files in gitattributes --- .gitattributes | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitattributes b/.gitattributes index 6ac6e6fcc3f579..f98c091e3f0464 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7,3 +7,8 @@ tool/update-deps diff=ruby tool/make-snapshot diff=ruby tool/format-release diff=ruby tool/leaked-globals diff=ruby + +# To strip CR from the batch files, set the `diff.dos.textconv` filter +# like as `git config diff.dos.textconv $'sed \'s/\r$//\''`. +*.bat diff=dos +*.cmd diff=dos From 1cc2e68fdc7cb85e5a581a96559fe00e86421998 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Fri, 22 Nov 2024 10:19:46 +0900 Subject: [PATCH 56/77] Win32: Refactor configure * Enable double-quoted options with an `=` sign. * Replace `$` with `$$` in the batch file without CPP. * Support for `--with-destdir`. * Allow Makefile macro definition. (Close GH-15935) --- configure.ac | 7 +- win32/configure.bat | 370 ++++++++++++++++++-------------------------- win32/enc-setup.mak | 4 +- win32/setup.mak | 2 +- 4 files changed, 160 insertions(+), 223 deletions(-) diff --git a/configure.ac b/configure.ac index 4e7367804d3d6b..2d9ccf1442920e 100644 --- a/configure.ac +++ b/configure.ac @@ -743,6 +743,12 @@ AS_CASE(["$GCC:${warnflags+set}:${extra_warnflags:+set}:"], AS_CASE([ $CFLAGS ], [*" -save-temps="*|*" -save-temps "*], [], [ extra_warnflags="$extra_warnflags -Werror=misleading-indentation" ]) + AS_CASE([$target_os], [mingw*], [ + # 64bit Windows is IL32P64; shorten-64-to-32 causes tons of warnigs + extra_warnflags="$extra_warnflags -Wno-shorten-64-to-32" + ], [ + extra_warnflags="$extra_warnflags -Werror=shorten-64-to-32" + ]) # ICC doesn't support -Werror= AS_IF([test $icc_version -gt 0], [ @@ -756,7 +762,6 @@ AS_CASE(["$GCC:${warnflags+set}:${extra_warnflags:+set}:"], -Werror=implicit-function-declaration \ -Werror=implicit-int \ -Werror=pointer-arith \ - -Werror=shorten-64-to-32 \ -Werror=write-strings \ -Werror=old-style-definition \ -Wimplicit-fallthrough=0 \ diff --git a/win32/configure.bat b/win32/configure.bat index 9355caa4d852da..fbe2233d63b8bb 100755 --- a/win32/configure.bat +++ b/win32/configure.bat @@ -1,7 +1,6 @@ @echo off @setlocal EnableExtensions DisableDelayedExpansion || exit /b -1 set PROMPT=$E[94m+$E[m$S -set witharg= if "%~dp0" == "%CD%\" ( echo don't run in win32 directory. @@ -19,180 +18,128 @@ call set "WIN32DIR=%%WIN32DIR:%~x0:/:=:/:%%" call set "WIN32DIR=%%WIN32DIR:/%~n0:/:=:/:%%" set "WIN32DIR=%WIN32DIR:~0,-3%" -set XINCFLAGS= -set XLDFLAGS= +set configure=%~0 +set XINCFLAGS= +set XLDFLAGS= set pathlist= set config_make=confargs~%RANDOM%.mak -set confargs=%config_make:.mak=.c% +set confargs=%config_make:.mak=.sub% +set debug_configure= echo>%config_make% # CONFIGURE -( - echo #define $ $$ // - echo !ifndef CONFIGURE_ARGS - echo #define CONFIGURE_ARGS \ -) >%confargs% +type nul > %confargs% :loop -set opt=%1 -if "%1" == "" goto :end -if "%1" == "--debug-configure" (echo on & shift & goto :loop) -if "%1" == "--no-debug-configure" (echo off & shift & goto :loop) -if "%1" == "--prefix" goto :prefix -if "%1" == "--srcdir" goto :srcdir -if "%1" == "srcdir" goto :srcdir -if "%1" == "--target" goto :target -if "%1" == "target" goto :target -if "%1" == "--with-static-linked-ext" goto :extstatic -if "%1" == "--program-prefix" goto :pprefix -if "%1" == "--program-suffix" goto :suffix -if "%1" == "--program-transform-name" goto :transform_name -if "%1" == "--program-name" goto :installname -if "%1" == "--install-name" goto :installname -if "%1" == "--so-name" goto :soname -if "%1" == "--enable-install-doc" goto :enable-rdoc -if "%1" == "--disable-install-doc" goto :disable-rdoc -if "%1" == "--enable-install-static-library" goto :enable-lib -if "%1" == "--disable-install-static-library" goto :disable-lib -if "%1" == "--enable-debug-env" goto :enable-debug-env -if "%1" == "--disable-debug-env" goto :disable-debug-env -if "%1" == "--enable-devel" goto :enable-devel -if "%1" == "--disable-devel" goto :disable-devel -if "%1" == "--enable-rubygems" goto :enable-rubygems -if "%1" == "--disable-rubygems" goto :disable-rubygems -if "%1" == "--extout" goto :extout -if "%1" == "--path" goto :path -if "%1" == "--with-baseruby" goto :baseruby -if "%1" == "--without-baseruby" goto :nobaseruby -if "%1" == "--with-ntver" goto :ntver -if "%1" == "--with-libdir" goto :libdir -if "%1" == "--with-git" goto :git -if "%1" == "--without-git" goto :nogit -if "%1" == "--without-ext" goto :witharg -if "%1" == "--without-extensions" goto :witharg -if "%1" == "--with-opt-dir" goto :opt-dir -if "%1" == "--with-gmp" goto :gmp -if "%1" == "--with-gmp-dir" goto :gmp-dir -if "%opt:~0,10%" == "--without-" goto :withoutarg -if "%opt:~0,7%" == "--with-" goto :witharg -if "%1" == "-h" goto :help -if "%1" == "--help" goto :help - if "%opt:~0,1%" == "-" ( - echo>>%confargs% %1 \ - set witharg= - ) else if "%witharg%" == "" ( - echo>>%confargs% %1 \ - ) else ( - echo>>%confargs% ,%1\ +if [%1] == [] goto :end ; +if "%~1" == "" (shift & goto :loop) +for /f "delims== tokens=1,*" %%I in ("%~1") do ((set "opt=%%I") && (set "arg=%%J")) + set "eq==" + if "%arg%" == "" if not "%~1" == "%opt%=%arg%" (set "eq=") + shift + if "%opt%" == "--debug-configure" ( + echo on + set "debug_configure=yes" + goto :loop ; ) - shift -goto :loop ; -:srcdir - echo>> %config_make% srcdir = %~2 - echo>>%confargs% --srcdir=%2 \ - shift - shift -goto :loop ; -:prefix - echo>> %config_make% prefix = %~2 - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; -:pprefix - echo>> %config_make% PROGRAM_PREFIX = %~2 - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; -:suffix - echo>> %config_make% PROGRAM_SUFFIX = %~2 - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; -:installname - echo>> %config_make% RUBY_INSTALL_NAME = %~2 - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; -:soname - echo>> %config_make% RUBY_SO_NAME = %~2 - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; -:transform_name - - shift - shift -goto :loop ; -:target - echo>> %config_make% target = %~2 - echo>>%confargs% --target=%2 \ - if "%~2" == "x64-mswin64" ( - echo>> %config_make% TARGET_OS = mswin64 + if "%opt%" == "--no-debug-configure" ( + echo off + set "debug_configure=" + goto :loop ; ) - shift - shift -goto :loop ; -:extstatic - echo>> %config_make% EXTSTATIC = static - echo>>%confargs% %1 \ - shift -goto :loop ; -:enable-rdoc - echo>> %config_make% RDOCTARGET = rdoc - echo>>%confargs% %1 \ - shift -goto :loop ; -:disable-rdoc - echo>> %config_make% RDOCTARGET = nodoc - echo>>%confargs% %1 \ - shift -goto :loop ; -:enable-lib - echo>> %config_make% INSTALL_STATIC_LIBRARY = yes - echo>>%confargs% %1 \ - shift -goto :loop ; -:disable-lib - echo>> %config_make% INSTALL_STATIC_LIBRARY = no - echo>>%confargs% %1 \ - shift -goto :loop ; -:enable-debug-env - echo>> %config_make% ENABLE_DEBUG_ENV = yes - echo>>%confargs% %1 \ - shift -goto :loop ; -:disable-debug-env - echo>> %config_make% ENABLE_DEBUG_ENV = no - echo>>%confargs% %1 \ - shift -goto :loop ; -:enable-devel - echo>> %config_make% RUBY_DEVEL = yes - echo>>%confargs% %1 \ - shift -goto :loop ; -:disable-devel - echo>> %config_make% RUBY_DEVEL = no - echo>>%confargs% %1 \ - shift -goto :loop ; -:enable-rubygems - echo>> %config_make% USE_RUBYGEMS = yes - echo>>%confargs% %1 \ - shift -goto :loop ; -:disable-rubygems - echo>> %config_make% USE_RUBYGEMS = no - echo>>%confargs% %1 \ - shift + if "%opt%" == "--prefix" goto :dir + if "%opt%" == "srcdir" set "opt=--srcdir" + if "%opt%" == "--srcdir" goto :dir + if "%opt%" == "--target" goto :target + if "%opt%" == "target" goto :target + if "%opt:~0,10%" == "--program-" goto :program_name + if "%opt%" == "--install-name" (set "var=RUBY_INSTALL_NAME" & goto :name) + if "%opt%" == "--so-name" (set "var=RUBY_SO_NAME" & goto :name) + if "%opt%" == "--extout" goto :extout + if "%opt%" == "--path" goto :path + if "%opt:~0,9%" == "--enable-" (set "enable=yes" & goto :enable) + if "%opt:~0,10%" == "--disable-" (set "enable=no" & goto :enable) + if "%opt:~0,10%" == "--without-" goto :withoutarg + if "%opt:~0,7%" == "--with-" goto :witharg + if "%opt%" == "-h" goto :help + if "%opt%" == "--help" goto :help + if "%opt:~0,1%" == "-" ( + goto :unknown_opt + ) + if "%eq%" == "=" ( + set "var=%opt%" + goto :name + ) + set "eq==" +:target + if "%eq%" == "" (set "arg=%~1" & shift) + echo>> %config_make% target = %arg% + echo>>%confargs% "--target=%arg:$=$$%" \ + if "%arg%" == "x64-mswin64" ( + echo>> %config_make% TARGET_OS = mswin64 + ) +goto :loop +:program_name + if "%eq%" == "" (set "arg=%~1" & shift) + for /f "delims=- tokens=1,*" %I in ("%opt%") do set "var=%%J" + if "%var%" == "prefix" (set "var=PROGRAM_PREFIX" & goto :name) + if "%var%" == "suffix" (set "var=PROGRAM_SUFFIX" & goto :name) + if "%var%" == "name" (set "var=RUBY_INSTALL_NAME" & goto :name) + if "%var%" == "transform-name" ( + echo.1>&2 %configure%: --program-transform-name option is not supported + exit /b 1 + ) +goto :unknown_opt +:name + if "%eq%" == "" (set "arg=%~1" & shift) + echo>> %config_make% %var% = %arg% + echo>>%confargs% "%opt%=%arg:$=$$%" \ +goto :loop ; +:dir + if "%eq%" == "" (set "arg=%~1" & shift) + echo>> %config_make% %opt:~2% = %arg:\=/% + echo>>%confargs% "%opt%=%arg:$=$$%" \ +goto :loop ; +:enable + echo>>%confargs% "%opt%" \ + if %enable% == yes (set "opt=%opt:~9%") else (set "opt=%opt:~10%") + if "%opt%" == "rdoc" ( + echo>> %config_make% RDOCTARGET = %enable:yes=r%doc + ) + if "%opt%" == "install-static-library" ( + echo>> %config_make% INSTALL_STATIC_LIBRARY = %enable% + ) + if "%opt%" == "debug-env" ( + echo>> %config_make% ENABLE_DEBUG_ENV = %enable% + ) + if "%opt%" == "devel" ( + echo>> %config_make% RUBY_DEVEL = %enable% + ) + if "%opt%" == "rubygems" ( + echo>> %config_make% USE_RUBYGEMS = %enable% + ) +goto :loop ; +:withoutarg + echo>>%confargs% "%opt%" \ + if "%opt%" == "--without-baseruby" goto :nobaseruby + if "%opt%" == "--without-git" goto :nogit + if "%opt%" == "--without-ext" goto :witharg + if "%opt%" == "--without-extensions" goto :witharg +goto :loop ; +:witharg + if "%opt%" == "--with-static-linked-ext" goto :extstatic + if "%eq%" == "" (set "arg=%~1" & shift) + echo>>%confargs% "%opt%=%arg:$=$$%" \ + if "%opt%" == "--with-baseruby" goto :baseruby + if "%opt%" == "--with-ntver" goto :ntver + if "%opt%" == "--with-libdir" goto :libdir + if "%opt%" == "--with-git" goto :git + if "%opt%" == "--with-opt-dir" goto :opt-dir + if "%opt%" == "--with-gmp-dir" goto :opt-dir + if "%opt%" == "--with-gmp" goto :gmp + if "%opt%" == "--with-destdir" goto :destdir goto :loop ; :ntver ::- For version constants, see ::- https://learn.microsoft.com/en-us/cpp/porting/modifying-winver-and-win32-winnt#remarks - set NTVER=%~2 + if "%eq%" == "" (set "NTVER=%~1" & shift) else (set "NTVER=%arg%") if /i not "%NTVER:~0,2%" == "0x" if /i not "%NTVER:~0,13%" == "_WIN32_WINNT_" ( for %%i in (A B C D E F G H I J K L M N O P Q R S T U V W X Y Z) do ( call set NTVER=%%NTVER:%%i=%%i%% @@ -200,62 +147,53 @@ goto :loop ; call set NTVER=_WIN32_WINNT_%%NTVER%% ) echo>> %config_make% NTVER = %NTVER% - echo>>%confargs% %1=%2 \ - shift - shift + echo>>%confargs% "%opt%=%arg:$=$$%" \ goto :loop ; :extout - if not "%~2" == ".ext" (echo>> %config_make% EXTOUT = %~2) - echo>>%confargs% %1=%2 \ - shift - shift + if "%eq%" == "" (set "arg=%~1" & shift) + if not "%arg%" == ".ext" (echo>> %config_make% EXTOUT = %arg%) + echo>>%confargs% "%opt%=%arg:$=$$%" \ goto :loop ; :path - set pathlist=%pathlist%%~2; - echo>>%confargs% %1=%2 \ - shift - shift -goto :loop ; + if "%eq%" == "" (set "arg=%~1" & shift) + set pathlist=%pathlist%%arg%; + echo>>%confargs% "%opt%=%arg:$=$$%" \ +goto :loop ; +:extstatic + if "%eq%" == "" (set "arg=static" & shift) + echo>> %config_make% EXTSTATIC = %arg% + echo>>%confargs% "%opt%=%arg:$=$$%" \ +goto :loop ; :baseruby echo>> %config_make% HAVE_BASERUBY = yes - echo>> %config_make% BASERUBY = %~2 - echo>>%confargs% %1=%2 \ - shift - shift + echo>> %config_make% BASERUBY = %arg% goto :loop ; :nobaseruby echo>> %config_make% HAVE_BASERUBY = no echo>> %config_make% BASERUBY = - echo>>%confargs% %1 \ - shift goto :loop ; :libdir - echo>> %config_make% libdir_basename = %~2 - echo>>%confargs% %1=%2 \ - shift - shift + echo>> %config_make% libdir_basename = %arg% goto :loop ; :git - echo>> %config_make% GIT = %~2 - echo>>%confargs% %1=%2 \ - shift - shift + echo>> %config_make% GIT = %arg% goto :loop ; :nogit echo>> %config_make% GIT = never-use echo>> %config_make% HAVE_GIT = no - echo>>%confargs% %1 \ - shift goto :loop ; :gmp echo>> %config_make% WITH_GMP = yes - echo>>%confargs% %1 \ - shift goto :loop ; -:gmp-dir +:destdir + echo>> %config_make% DESTDIR = %arg% +goto :loop ; :opt-dir - set opt=%~2 - for %%I in (%opt:;= %) do ( + if "%arg%" == "" ( + echo 1>&2 %configure%: missing argument for %opt% + exit /b 1 + ) + for %%I in (%arg:;= %) do ( set d=%%I call pushd %%d:/=\%% && ( call set XINCFLAGS=%%XINCFLAGS%% -I%%CD:\=/%%/include @@ -263,20 +201,11 @@ goto :loop ; popd ) ) -:witharg - echo>>%confargs% %1=%2\ - set witharg=1 - shift - shift -goto :loop ; -:withoutarg - echo>>%confargs% %1 \ - shift goto :loop ; :help echo Configuration: echo --help display this help - echo --srcdir=DIR find the sources in DIR [configure dir or `..'] + echo --srcdir=DIR find the sources in DIR [configure dir or '..'] echo Installation directories: echo --prefix=PREFIX install files in PREFIX [/usr] echo System types: @@ -286,24 +215,26 @@ goto :loop ; echo --with-static-linked-ext link external modules statically echo --with-ext="a,b,..." use extensions a, b, ... echo --without-ext="a,b,..." ignore extensions a, b, ... - echo --with-opt-dir="DIR-LIST" add optional headers and libraries directories separated by `;' + echo --with-opt-dir="DIR-LIST" add optional headers and libraries directories separated by ';' echo --disable-install-doc do not install rdoc indexes during install echo --with-ntver=0xXXXX target NT version (shouldn't use with old SDK) echo --with-ntver=_WIN32_WINNT_XXXX echo --with-ntver=XXXX same as --with-ntver=_WIN32_WINNT_XXXX - echo Note that `,' and `;' need to be enclosed within double quotes in batch file command line. + echo Note that '=,;' need to be enclosed within double quotes in batch file command line. del %confargs% %config_make% goto :exit +:unknown_opt + ( + echo %configure%: unknown option %opt% + echo Try --help option. + ) 1>&2 + exit /b 1 :end +if "%debug_configure%" == "yes" (type %confargs%) ( - echo // - echo configure_args = CONFIGURE_ARGS - echo !endif - echo #undef $ -) >> %confargs% -( - cl -EP %confargs% 2>nul | findstr "! =" - echo. + echo configure_args = \ + type %confargs% + echo # configure_args if NOT "%XINCFLAGS%" == "" echo XINCFLAGS = %XINCFLAGS% if NOT "%XLDFLAGS%" == "" echo XLDFLAGS = %XLDFLAGS% if NOT "%pathlist%" == "" ( @@ -312,7 +243,8 @@ goto :exit call echo LIB = %%pathlist:;=/lib;%%$^(LIB^) ) ) >> %config_make% -del %confargs% > nul +del %confargs% +if "%debug_configure%" == "yes" (type %config_make%) nmake -al -f %WIN32DIR%/setup.mak "WIN32DIR=%WIN32DIR%" ^ config_make=%config_make% ^ diff --git a/win32/enc-setup.mak b/win32/enc-setup.mak index 44345a2f45d6ed..b012161e3256cc 100644 --- a/win32/enc-setup.mak +++ b/win32/enc-setup.mak @@ -4,7 +4,7 @@ BUILTIN_ENCOBJS BUILTIN_TRANSOBJS: $(srcdir)/enc/Makefile.in !include $(srcdir)/enc/Makefile.in BUILTIN_ENCOBJS: - @echo BUILTIN_ENCOBJS = $(BUILTIN_ENCS:.c=.obj) >> $(MAKEFILE) + @echo>> $(MAKEFILE) BUILTIN_ENCOBJS = $(BUILTIN_ENCS:.c=.obj) BUILTIN_TRANSOBJS: - @echo BUILTIN_TRANSOBJS = $(BUILTIN_TRANSES:.trans=.obj) >> $(MAKEFILE) + @echo>> $(MAKEFILE) BUILTIN_TRANSOBJS = $(BUILTIN_TRANSES:.trans=.obj) diff --git a/win32/setup.mak b/win32/setup.mak index 6fc28ebafbc996..3f14e251cd52e0 100644 --- a/win32/setup.mak +++ b/win32/setup.mak @@ -277,5 +277,5 @@ $(BANG)include $$(srcdir)/win32/Makefile.sub << @$(COMSPEC) /C $(srcdir:/=\)\win32\rm.bat config.h config.status -@move /y $(MAKEFILE_NEW) $(MAKEFILE_BACK) > nul 2> nul - @ren $(MAKEFILE) $(MAKEFILE_NEW) + @move /y $(MAKEFILE) $(MAKEFILE_NEW) > nul @echo type 'nmake' to make ruby. From 6cc209600df111549c159ee828c900141ce98f00 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 28 Jan 2026 13:12:02 +0900 Subject: [PATCH 57/77] Win32: Make `optdirs` and `pathlist` macros overridable Also consider paths with space at splitting the `--with-opt-dir` argument. --- win32/configure.bat | 234 +++++++++++++++++++++++--------------------- 1 file changed, 121 insertions(+), 113 deletions(-) diff --git a/win32/configure.bat b/win32/configure.bat index fbe2233d63b8bb..4699457c8d1c32 100755 --- a/win32/configure.bat +++ b/win32/configure.bat @@ -19,8 +19,7 @@ call set "WIN32DIR=%%WIN32DIR:/%~n0:/:=:/:%%" set "WIN32DIR=%WIN32DIR:~0,-3%" set configure=%~0 -set XINCFLAGS= -set XLDFLAGS= +set optdirs= set pathlist= set config_make=confargs~%RANDOM%.mak set confargs=%config_make:.mak=.sub% @@ -44,97 +43,97 @@ for /f "delims== tokens=1,*" %%I in ("%~1") do ((set "opt=%%I") && (set "arg=%%J set "debug_configure=" goto :loop ; ) - if "%opt%" == "--prefix" goto :dir - if "%opt%" == "srcdir" set "opt=--srcdir" - if "%opt%" == "--srcdir" goto :dir - if "%opt%" == "--target" goto :target - if "%opt%" == "target" goto :target - if "%opt:~0,10%" == "--program-" goto :program_name - if "%opt%" == "--install-name" (set "var=RUBY_INSTALL_NAME" & goto :name) - if "%opt%" == "--so-name" (set "var=RUBY_SO_NAME" & goto :name) + if "%opt%" == "--prefix" goto :dir + if "%opt%" == "srcdir" set "opt=--srcdir" + if "%opt%" == "--srcdir" goto :dir + if "%opt%" == "--target" goto :target + if "%opt%" == "target" goto :target + if "%opt:~0,10%" == "--program-" goto :program_name + if "%opt%" == "--install-name" (set "var=RUBY_INSTALL_NAME" & goto :name) + if "%opt%" == "--so-name" (set "var=RUBY_SO_NAME" & goto :name) if "%opt%" == "--extout" goto :extout if "%opt%" == "--path" goto :path - if "%opt:~0,9%" == "--enable-" (set "enable=yes" & goto :enable) - if "%opt:~0,10%" == "--disable-" (set "enable=no" & goto :enable) - if "%opt:~0,10%" == "--without-" goto :withoutarg - if "%opt:~0,7%" == "--with-" goto :witharg - if "%opt%" == "-h" goto :help - if "%opt%" == "--help" goto :help - if "%opt:~0,1%" == "-" ( - goto :unknown_opt - ) - if "%eq%" == "=" ( - set "var=%opt%" - goto :name - ) - set "eq==" -:target - if "%eq%" == "" (set "arg=%~1" & shift) - echo>> %config_make% target = %arg% - echo>>%confargs% "--target=%arg:$=$$%" \ - if "%arg%" == "x64-mswin64" ( - echo>> %config_make% TARGET_OS = mswin64 - ) -goto :loop -:program_name - if "%eq%" == "" (set "arg=%~1" & shift) - for /f "delims=- tokens=1,*" %I in ("%opt%") do set "var=%%J" - if "%var%" == "prefix" (set "var=PROGRAM_PREFIX" & goto :name) - if "%var%" == "suffix" (set "var=PROGRAM_SUFFIX" & goto :name) - if "%var%" == "name" (set "var=RUBY_INSTALL_NAME" & goto :name) - if "%var%" == "transform-name" ( - echo.1>&2 %configure%: --program-transform-name option is not supported - exit /b 1 - ) -goto :unknown_opt -:name - if "%eq%" == "" (set "arg=%~1" & shift) - echo>> %config_make% %var% = %arg% - echo>>%confargs% "%opt%=%arg:$=$$%" \ -goto :loop ; -:dir - if "%eq%" == "" (set "arg=%~1" & shift) - echo>> %config_make% %opt:~2% = %arg:\=/% - echo>>%confargs% "%opt%=%arg:$=$$%" \ -goto :loop ; -:enable - echo>>%confargs% "%opt%" \ - if %enable% == yes (set "opt=%opt:~9%") else (set "opt=%opt:~10%") - if "%opt%" == "rdoc" ( - echo>> %config_make% RDOCTARGET = %enable:yes=r%doc - ) - if "%opt%" == "install-static-library" ( - echo>> %config_make% INSTALL_STATIC_LIBRARY = %enable% - ) - if "%opt%" == "debug-env" ( - echo>> %config_make% ENABLE_DEBUG_ENV = %enable% - ) - if "%opt%" == "devel" ( - echo>> %config_make% RUBY_DEVEL = %enable% - ) - if "%opt%" == "rubygems" ( - echo>> %config_make% USE_RUBYGEMS = %enable% - ) -goto :loop ; -:withoutarg - echo>>%confargs% "%opt%" \ + if "%opt:~0,9%" == "--enable-" (set "enable=yes" & goto :enable) + if "%opt:~0,10%" == "--disable-" (set "enable=no" & goto :enable) + if "%opt:~0,10%" == "--without-" goto :withoutarg + if "%opt:~0,7%" == "--with-" goto :witharg + if "%opt%" == "-h" goto :help + if "%opt%" == "--help" goto :help + if "%opt:~0,1%" == "-" ( + goto :unknown_opt + ) + if "%eq%" == "=" ( + set "var=%opt%" + goto :name + ) + set "eq==" +:target + if "%eq%" == "" (set "arg=%~1" & shift) + echo>> %config_make% target = %arg% + echo>>%confargs% "--target=%arg:$=$$%" \ + if "%arg%" == "x64-mswin64" ( + echo>> %config_make% TARGET_OS = mswin64 + ) +goto :loop +:program_name + if "%eq%" == "" (set "arg=%~1" & shift) + for /f "delims=- tokens=1,*" %I in ("%opt%") do set "var=%%J" + if "%var%" == "prefix" (set "var=PROGRAM_PREFIX" & goto :name) + if "%var%" == "suffix" (set "var=PROGRAM_SUFFIX" & goto :name) + if "%var%" == "name" (set "var=RUBY_INSTALL_NAME" & goto :name) + if "%var%" == "transform-name" ( + echo.1>&2 %configure%: --program-transform-name option is not supported + exit /b 1 + ) +goto :unknown_opt +:name + if "%eq%" == "" (set "arg=%~1" & shift) + echo>> %config_make% %var% = %arg% + echo>>%confargs% "%opt%=%arg:$=$$%" \ +goto :loop ; +:dir + if "%eq%" == "" (set "arg=%~1" & shift) + echo>> %config_make% %opt:~2% = %arg:\=/% + echo>>%confargs% "%opt%=%arg:$=$$%" \ +goto :loop ; +:enable + echo>>%confargs% "%opt%" \ + if %enable% == yes (set "opt=%opt:~9%") else (set "opt=%opt:~10%") + if "%opt%" == "rdoc" ( + echo>> %config_make% RDOCTARGET = %enable:yes=r%doc + ) + if "%opt%" == "install-static-library" ( + echo>> %config_make% INSTALL_STATIC_LIBRARY = %enable% + ) + if "%opt%" == "debug-env" ( + echo>> %config_make% ENABLE_DEBUG_ENV = %enable% + ) + if "%opt%" == "devel" ( + echo>> %config_make% RUBY_DEVEL = %enable% + ) + if "%opt%" == "rubygems" ( + echo>> %config_make% USE_RUBYGEMS = %enable% + ) +goto :loop ; +:withoutarg + echo>>%confargs% "%opt%" \ if "%opt%" == "--without-baseruby" goto :nobaseruby if "%opt%" == "--without-git" goto :nogit if "%opt%" == "--without-ext" goto :witharg if "%opt%" == "--without-extensions" goto :witharg -goto :loop ; -:witharg - if "%opt%" == "--with-static-linked-ext" goto :extstatic - if "%eq%" == "" (set "arg=%~1" & shift) - echo>>%confargs% "%opt%=%arg:$=$$%" \ - if "%opt%" == "--with-baseruby" goto :baseruby - if "%opt%" == "--with-ntver" goto :ntver - if "%opt%" == "--with-libdir" goto :libdir - if "%opt%" == "--with-git" goto :git +goto :loop ; +:witharg + if "%opt%" == "--with-static-linked-ext" goto :extstatic + if "%eq%" == "" (set "arg=%~1" & shift) + echo>>%confargs% "%opt%=%arg:$=$$%" \ + if "%opt%" == "--with-baseruby" goto :baseruby + if "%opt%" == "--with-ntver" goto :ntver + if "%opt%" == "--with-libdir" goto :libdir + if "%opt%" == "--with-git" goto :git if "%opt%" == "--with-opt-dir" goto :opt-dir - if "%opt%" == "--with-gmp-dir" goto :opt-dir + if "%opt%" == "--with-gmp-dir" goto :opt-dir if "%opt%" == "--with-gmp" goto :gmp - if "%opt%" == "--with-destdir" goto :destdir + if "%opt%" == "--with-destdir" goto :destdir goto :loop ; :ntver ::- For version constants, see @@ -156,14 +155,14 @@ goto :loop ; goto :loop ; :path if "%eq%" == "" (set "arg=%~1" & shift) - set pathlist=%pathlist%%arg%; + set "pathlist=%pathlist%%arg:\=/%;" + echo>>%confargs% "%opt%=%arg:$=$$%" \ +goto :loop ; +:extstatic + if "%eq%" == "" (set "arg=static" & shift) + echo>> %config_make% EXTSTATIC = %arg% echo>>%confargs% "%opt%=%arg:$=$$%" \ goto :loop ; -:extstatic - if "%eq%" == "" (set "arg=static" & shift) - echo>> %config_make% EXTSTATIC = %arg% - echo>>%confargs% "%opt%=%arg:$=$$%" \ -goto :loop ; :baseruby echo>> %config_make% HAVE_BASERUBY = yes echo>> %config_make% BASERUBY = %arg% @@ -185,22 +184,21 @@ goto :loop ; :gmp echo>> %config_make% WITH_GMP = yes goto :loop ; -:destdir - echo>> %config_make% DESTDIR = %arg% -goto :loop ; +:destdir + echo>> %config_make% DESTDIR = %arg% +goto :loop ; :opt-dir - if "%arg%" == "" ( - echo 1>&2 %configure%: missing argument for %opt% - exit /b 1 - ) - for %%I in (%arg:;= %) do ( - set d=%%I - call pushd %%d:/=\%% && ( - call set XINCFLAGS=%%XINCFLAGS%% -I%%CD:\=/%%/include - call set XLDFLAGS=%%XLDFLAGS%% -libpath:%%CD:\=/%%/lib + if "%arg%" == "" ( + echo 1>&2 %configure%: missing argument for %opt% + exit /b 1 + ) + :optdir-loop + for /f "delims=; tokens=1,*" %%I in ("%arg%") do (set "d=%%I" & set "arg=%%J") + pushd %d:/=\% && ( + set "optdirs=%optdirs%;%CD:\=/%" popd ) - ) + if not "%arg%" == "" goto :optdir-loop goto :loop ; :help echo Configuration: @@ -223,26 +221,36 @@ goto :loop ; echo Note that '=,;' need to be enclosed within double quotes in batch file command line. del %confargs% %config_make% goto :exit -:unknown_opt - ( - echo %configure%: unknown option %opt% - echo Try --help option. - ) 1>&2 - exit /b 1 +:unknown_opt + ( + echo %configure%: unknown option %opt% + echo Try --help option. + ) 1>&2 + exit /b 1 :end if "%debug_configure%" == "yes" (type %confargs%) +if not "%optdirs%" == "" (echo>>%config_make% optdirs = %optdirs:~1%) ( + echo. echo configure_args = \ type %confargs% echo # configure_args - if NOT "%XINCFLAGS%" == "" echo XINCFLAGS = %XINCFLAGS% - if NOT "%XLDFLAGS%" == "" echo XLDFLAGS = %XLDFLAGS% - if NOT "%pathlist%" == "" ( + + echo. + echo !if "$(optdirs)" != "" + for %%I in ("$(optdirs:\=/)" "$(optdirs:/;=;)") do @echo optdirs = %%~I + echo XINCFLAGS = -I"$(optdirs:;=/include" -I")/include" + echo XLDFLAGS = -libpath:"$(optdirs:;=/lib" -libpath:")/lib" + echo !endif + + if not "%pathlist%" == "" ( + echo. call echo PATH = %%pathlist:;=/bin;%%$^(PATH^) call echo INCLUDE = %%pathlist:;=/include;%%$^(INCLUDE^) call echo LIB = %%pathlist:;=/lib;%%$^(LIB^) ) ) >> %config_make% + del %confargs% if "%debug_configure%" == "yes" (type %config_make%) From b5ecfd1eba928d577d100fc3377375b55bf3cbb1 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 28 Jan 2026 15:53:15 +0900 Subject: [PATCH 58/77] Win32: Refactor target detection Previously, Visual C++ had only one toolchain for the x86 family, and the only option was to select the target processor level. In recent versions, there are multiple toolchains with the same command name for each host/target platform combination, so it is no longer possible to select the target with a command-line option. Also, configure.bat assumes that the toolchain has been configured before it is executed, so selecting it from this batch file is meaningless. Therefore, the only possible check is whether the specified target and compiler match. --- win32/configure.bat | 24 +++++++++------ win32/setup.mak | 75 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 71 insertions(+), 28 deletions(-) diff --git a/win32/configure.bat b/win32/configure.bat index 4699457c8d1c32..94eed7bc128b72 100755 --- a/win32/configure.bat +++ b/win32/configure.bat @@ -66,15 +66,18 @@ for /f "delims== tokens=1,*" %%I in ("%~1") do ((set "opt=%%I") && (set "arg=%%J set "var=%opt%" goto :name ) - set "eq==" + set "target=%opt%" + echo>>%confargs% "--target=%opt:$=$$%" \ +goto :loop ; :target if "%eq%" == "" (set "arg=%~1" & shift) - echo>> %config_make% target = %arg% - echo>>%confargs% "--target=%arg:$=$$%" \ - if "%arg%" == "x64-mswin64" ( - echo>> %config_make% TARGET_OS = mswin64 + if "%arg%" == "" ( + echo 1>&2 %configure%: missing argument for %opt% + exit /b 1 ) -goto :loop + set "target=%arg%" + echo>>%confargs% "--target=%arg:$=$$%" \ +goto :loop ; :program_name if "%eq%" == "" (set "arg=%~1" & shift) for /f "delims=- tokens=1,*" %I in ("%opt%") do set "var=%%J" @@ -220,7 +223,7 @@ goto :loop ; echo --with-ntver=XXXX same as --with-ntver=_WIN32_WINNT_XXXX echo Note that '=,;' need to be enclosed within double quotes in batch file command line. del %confargs% %config_make% -goto :exit +goto :EOF :unknown_opt ( echo %configure%: unknown option %opt% @@ -256,6 +259,7 @@ if "%debug_configure%" == "yes" (type %config_make%) nmake -al -f %WIN32DIR%/setup.mak "WIN32DIR=%WIN32DIR%" ^ config_make=%config_make% ^ - MAKEFILE=Makefile.new MAKEFILE_BACK=Makefile.old MAKEFILE_NEW=Makefile -:exit -@endlocal + MAKEFILE=Makefile.new MAKEFILE_BACK=Makefile.old MAKEFILE_NEW=Makefile ^ + %target% +set error=%ERRORLEVEL% +if exist %config_make% del /q %config_make% diff --git a/win32/setup.mak b/win32/setup.mak index 3f14e251cd52e0..19ace3445c2568 100644 --- a/win32/setup.mak +++ b/win32/setup.mak @@ -22,7 +22,7 @@ MAKE = $(MAKE) -f $(MAKEFILE) MAKEFILE = Makefile !endif CPU = PROCESSOR_LEVEL -CC = $(CC) -nologo -source-charset:utf-8 +CC = $(CC) -nologo CPP = $(CC) -EP !if "$(HAVE_BASERUBY)" != "no" && "$(BASERUBY)" == "" BASERUBY = ruby @@ -35,13 +35,14 @@ i586-mswin32: -prologue- -i586- -epilogue- i686-mswin32: -prologue- -i686- -epilogue- alpha-mswin32: -prologue- -alpha- -epilogue- x64-mswin64: -prologue- -x64- -epilogue- +arm64-mswin64: -prologue- -arm64- -epilogue- -prologue-: -basic-vars- -baseruby- -gmp- -generic-: -osname- -basic-vars-: nul @rem <<$(MAKEFILE) -### Makefile for ruby $(TARGET_OS) ### +### Makefile for ruby ### MAKE = nmake srcdir = $(srcdir:\=/) prefix = $(prefix:\=/) @@ -70,20 +71,31 @@ int main(void) {mpz_init(x); return 0;} @echo # TARGET>>$(MAKEFILE) -osname32-: -osname-section- - @echo TARGET_OS = mswin32>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if target OS is 32bit" >>$(MAKEFILE) +#ifdef _WIN64 +#error +#else +TARGET_OS = mswin32 +#endif +<< -osname64-: -osname-section- - @echo TARGET_OS = mswin64>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if target OS is 64bit" >>$(MAKEFILE) +#ifndef _WIN64 +#error +#else +TARGET_OS = mswin64 +#endif +<< -osname-: -osname-section- - @echo !ifndef TARGET_OS>>$(MAKEFILE) - @($(CC) -c < nul && (echo TARGET_OS = mswin32) || (echo TARGET_OS = mswin64)) >>$(MAKEFILE) + @$(CPP) -Tc <<"checking for target OS" 2>nul | findstr = >>$(MAKEFILE) #ifdef _WIN64 -#error +TARGET_OS = mswin64 +#else +TARGET_OS = mswin32 #endif << - @echo !endif>>$(MAKEFILE) - @$(WIN32DIR:/=\)\rm.bat conftest.* -compiler-: -compiler-section- -version- -runtime- -headers- @@ -211,27 +223,54 @@ del %0 & exit << -generic-: nul - @$(CPP) <nul | findstr = >>$(MAKEFILE) + @$(CPP) -Tc <nul | findstr = >>$(MAKEFILE) #if defined _M_ARM64 MACHINE = arm64 #elif defined _M_X64 MACHINE = x64 #else MACHINE = x86 -#endif -<< !if defined($(CPU)) - @echo>>$(MAKEFILE) $(CPU) = $(PROCESSOR_LEVEL) +$(CPU) = $(PROCESSOR_LEVEL) !endif +#endif -alpha-: -osname32- - @echo MACHINE = alpha>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if compiler is for $(@:-=)" >>$(MAKEFILE) +#ifndef _M_ALPHA +#error Not compiler for $(@:-=) +#else +MACHINE = $(@:-=) +#endif +<< + -x64-: -osname64- - @echo MACHINE = x64>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if compiler is for $(@:-=)" >>$(MAKEFILE) +#ifndef _M_AMD64 +#error Not compiler for $(@:-=) +#else +MACHINE = $(@:-=) +#endif +<< + -ix86-: -osname32- - @echo MACHINE = x86>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if compiler is for $(@:-=)" >>$(MAKEFILE) +#ifndef _M_IX86 +#error Not compiler for $(@:-=) +#else +#define ix86 x86 +MACHINE = $(@:-=) +#endif +<< + -arm64-: -osname64- - @echo MACHINE = arm64>>$(MAKEFILE) + @$(CPP) -Tc <<"checking if compiler is for $(@:-=)" >>$(MAKEFILE) +#ifndef _M_ARM64 +#error Not compiler for $(@:-=) +#else +MACHINE = $(@:-=) +#endif +<< -i386-: -ix86- @echo $(CPU) = 3>>$(MAKEFILE) @@ -261,7 +300,7 @@ MACHINE = x86 # XLDFLAGS = # RFLAGS = -r # EXTLIBS = -CC = $(CC) +CC = $(CC) -source-charset:utf-8 !if "$(AS)" != "ml64" AS = $(AS) -nologo !endif From 5299276e421c3f11c944069fe5c655628634b9e1 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 28 Jan 2026 18:35:27 +0900 Subject: [PATCH 59/77] win32: Prevent broken substitution string when variable is undefined Avoids an issue where `%undefined:A=B%` expands to a literal `A=B` because the parser fails to find the variable before the colon, then parses the following percent as the next variable expansion. Added a definition check to ensure safe expansion. --- win32/configure.bat | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/win32/configure.bat b/win32/configure.bat index 94eed7bc128b72..d4e010ac4b0374 100755 --- a/win32/configure.bat +++ b/win32/configure.bat @@ -66,8 +66,16 @@ for /f "delims== tokens=1,*" %%I in ("%~1") do ((set "opt=%%I") && (set "arg=%%J set "var=%opt%" goto :name ) - set "target=%opt%" - echo>>%confargs% "--target=%opt:$=$$%" \ + set "arg=%opt%" + set "eq==" + set "opt=--target" + set "target=%arg%" +:loopend + if not "%arg%" == "" ( + echo>>%confargs% "%opt%=%arg:$=$$%" \ + ) else ( + echo>>%confargs% "%opt%%eq%" \ + ) goto :loop ; :target if "%eq%" == "" (set "arg=%~1" & shift) @@ -76,6 +84,7 @@ goto :loop ; exit /b 1 ) set "target=%arg%" + set "opt=--target" echo>>%confargs% "--target=%arg:$=$$%" \ goto :loop ; :program_name @@ -92,13 +101,11 @@ goto :unknown_opt :name if "%eq%" == "" (set "arg=%~1" & shift) echo>> %config_make% %var% = %arg% - echo>>%confargs% "%opt%=%arg:$=$$%" \ -goto :loop ; +goto :loopend ; :dir if "%eq%" == "" (set "arg=%~1" & shift) echo>> %config_make% %opt:~2% = %arg:\=/% - echo>>%confargs% "%opt%=%arg:$=$$%" \ -goto :loop ; +goto :loopend ; :enable echo>>%confargs% "%opt%" \ if %enable% == yes (set "opt=%opt:~9%") else (set "opt=%opt:~10%") @@ -128,7 +135,11 @@ goto :loop ; :witharg if "%opt%" == "--with-static-linked-ext" goto :extstatic if "%eq%" == "" (set "arg=%~1" & shift) - echo>>%confargs% "%opt%=%arg:$=$$%" \ + if not "%arg%" == "" ( + echo>>%confargs% "%opt%=%arg:$=$$%" \ + ) else ( + echo>>%confargs% "%opt%%eq%" \ + ) if "%opt%" == "--with-baseruby" goto :baseruby if "%opt%" == "--with-ntver" goto :ntver if "%opt%" == "--with-libdir" goto :libdir @@ -149,23 +160,19 @@ goto :loop ; call set NTVER=_WIN32_WINNT_%%NTVER%% ) echo>> %config_make% NTVER = %NTVER% - echo>>%confargs% "%opt%=%arg:$=$$%" \ -goto :loop ; +goto :loopend ; :extout if "%eq%" == "" (set "arg=%~1" & shift) if not "%arg%" == ".ext" (echo>> %config_make% EXTOUT = %arg%) - echo>>%confargs% "%opt%=%arg:$=$$%" \ -goto :loop ; +goto :loopend ; :path if "%eq%" == "" (set "arg=%~1" & shift) set "pathlist=%pathlist%%arg:\=/%;" - echo>>%confargs% "%opt%=%arg:$=$$%" \ -goto :loop ; +goto :loopend ; :extstatic if "%eq%" == "" (set "arg=static" & shift) echo>> %config_make% EXTSTATIC = %arg% - echo>>%confargs% "%opt%=%arg:$=$$%" \ -goto :loop ; +goto :loopend ; :baseruby echo>> %config_make% HAVE_BASERUBY = yes echo>> %config_make% BASERUBY = %arg% From fbff0c936cc8b1a1c5f6fe51ea6e892721c200f1 Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Thu, 29 Jan 2026 13:00:13 -0500 Subject: [PATCH 60/77] ZJIT: Handle `nil` case for `getblockparamproxy` (#15986) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves https://github.com/Shopify/ruby/issues/772 Adds profiling for the `getblockparamproxy` YARV instruction and handles the `nil` block case by pushing `nil` instead of the block proxy object, improves `ratio_in_zjit` a tiny bit (0.1%) Profiling data for `getblockparamproxy` on Lobsters ``` Top-6 getblockparamproxy handler (100.0% of total 3,353,291): polymorphic: 2,337,372 (69.7%) nil: 552,629 (16.5%) iseq: 259,636 ( 7.7%) no_profiles: 156,734 ( 4.7%) proc: 40,223 ( 1.2%) megamorphic: 6,697 ( 0.2%) ``` Lobsters benchmark stats:
Stats before (master):

``` ❯ ./run_benchmarks.rb --chruby 'ruby-zjit --zjit-stats' lobsters ***ZJIT: Printing ZJIT statistics on exit*** ... Top-20 side exit reasons (100.0% of total 15,338,024): guard_type_failure: 6,889,050 (44.9%) guard_shape_failure: 6,848,898 (44.7%) block_param_proxy_not_iseq_or_ifunc: 1,008,525 ( 6.6%) unhandled_hir_insn: 236,977 ( 1.5%) compile_error: 191,763 ( 1.3%) fixnum_mult_overflow: 50,739 ( 0.3%) block_param_proxy_modified: 28,119 ( 0.2%) patchpoint_stable_constant_names: 18,229 ( 0.1%) unhandled_newarray_send_pack: 14,481 ( 0.1%) unhandled_block_arg: 13,782 ( 0.1%) fixnum_lshift_overflow: 10,085 ( 0.1%) patchpoint_no_ep_escape: 7,815 ( 0.1%) unhandled_yarv_insn: 7,540 ( 0.0%) expandarray_failure: 4,533 ( 0.0%) guard_super_method_entry: 4,475 ( 0.0%) patchpoint_method_redefined: 1,207 ( 0.0%) patchpoint_no_singleton_class: 1,130 ( 0.0%) obj_to_string_fallback: 412 ( 0.0%) guard_less_failure: 163 ( 0.0%) interrupt: 82 ( 0.0%) ... ratio_in_zjit: 82.1% ```

Stats after:

``` ❯ ./run_benchmarks.rb --chruby 'ruby-zjit --zjit-stats' lobsters ***ZJIT: Printing ZJIT statistics on exit*** ... Top-20 side exit reasons (100.0% of total 15,061,422): guard_type_failure: 6,892,934 (45.8%) guard_shape_failure: 6,850,512 (45.5%) block_param_proxy_not_iseq_or_ifunc: 549,823 ( 3.7%) unhandled_hir_insn: 236,979 ( 1.6%) compile_error: 191,782 ( 1.3%) unhandled_yarv_insn: 128,695 ( 0.9%) block_param_proxy_not_nil: 68,623 ( 0.5%) fixnum_mult_overflow: 50,739 ( 0.3%) patchpoint_stable_constant_names: 18,568 ( 0.1%) unhandled_newarray_send_pack: 14,481 ( 0.1%) block_param_proxy_modified: 13,819 ( 0.1%) unhandled_block_arg: 13,798 ( 0.1%) fixnum_lshift_overflow: 10,085 ( 0.1%) patchpoint_no_ep_escape: 7,815 ( 0.1%) expandarray_failure: 4,533 ( 0.0%) guard_super_method_entry: 4,475 ( 0.0%) patchpoint_method_redefined: 1,207 ( 0.0%) obj_to_string_fallback: 1,140 ( 0.0%) patchpoint_no_singleton_class: 1,130 ( 0.0%) guard_less_failure: 163 ( 0.0%) ... ratio_in_zjit: 82.2% ```

--- insns.def | 1 + vm_insnhelper.c | 2 +- yjit/src/cruby_bindings.inc.rs | 65 +++++++++--------- zjit.c | 1 + zjit.rb | 1 + zjit/bindgen/src/main.rs | 1 + zjit/src/codegen.rs | 50 ++++++++------ zjit/src/cruby.rs | 2 + zjit/src/cruby_bindings.inc.rs | 66 +++++++++--------- zjit/src/hir.rs | 121 +++++++++++++++++++++++++++++---- zjit/src/hir/opt_tests.rs | 93 ++++++++++++++++++++++--- zjit/src/hir/tests.rs | 40 ++++++----- zjit/src/profile.rs | 17 +++++ zjit/src/stats.rs | 11 +++ 14 files changed, 344 insertions(+), 127 deletions(-) diff --git a/insns.def b/insns.def index ceeaf4128e9abf..f9a334d824b31a 100644 --- a/insns.def +++ b/insns.def @@ -145,6 +145,7 @@ getblockparamproxy (lindex_t idx, rb_num_t level) () (VALUE val) +// attr bool zjit_profile = true; { const VALUE *ep = vm_get_ep(GET_EP(), level); VM_ASSERT(VM_ENV_LOCAL_P(ep)); diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 9cb163e97f2d36..a27bf5f49be69c 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -6050,7 +6050,7 @@ vm_define_method(const rb_execution_context_t *ec, VALUE obj, ID id, VALUE iseqv // * If it's VM_BLOCK_HANDLER_NONE, return nil // * If it's an ISEQ or an IFUNC, fetch it from its rb_captured_block // * If it's a PROC or SYMBOL, return it as is -static VALUE +VALUE rb_vm_untag_block_handler(VALUE block_handler) { if (VM_BLOCK_HANDLER_NONE == block_handler) return Qnil; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 56994388a3a4a1..9216802a3c1919 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -917,38 +917,39 @@ pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214; pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; -pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; -pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 219; -pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 220; -pub const YARVINSN_zjit_send: ruby_vminsn_type = 221; -pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249; +pub const YARVINSN_zjit_getblockparamproxy: ruby_vminsn_type = 218; +pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 246; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 247; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 248; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 249; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 250; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), diff --git a/zjit.c b/zjit.c index 9560d88130b03f..0c463334cde42a 100644 --- a/zjit.c +++ b/zjit.c @@ -300,6 +300,7 @@ rb_zjit_class_has_default_allocator(VALUE klass) } +VALUE rb_vm_untag_block_handler(VALUE block_handler); VALUE rb_vm_get_untagged_block_handler(rb_control_frame_t *reg_cfp); void diff --git a/zjit.rb b/zjit.rb index 0bd6c1b96d36d0..0cc9ca8261a274 100644 --- a/zjit.rb +++ b/zjit.rb @@ -191,6 +191,7 @@ def stats_string print_counters_with_prefix(prefix: 'getivar_fallback_', prompt: 'getivar fallback reasons', buf:, stats:, limit: 5) print_counters_with_prefix(prefix: 'definedivar_fallback_', prompt: 'definedivar fallback reasons', buf:, stats:, limit: 5) print_counters_with_prefix(prefix: 'invokeblock_handler_', prompt: 'invokeblock handler', buf:, stats:, limit: 10) + print_counters_with_prefix(prefix: 'getblockparamproxy_handler_', prompt: 'getblockparamproxy handler', buf:, stats:, limit: 10) # Show most popular unsupported call features. Because each call can # use multiple complex features, a decrease in this number does not diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 794293d1d321c7..d71e75c444af69 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -409,6 +409,7 @@ fn main() { .allowlist_function("rb_str_neq_internal") .allowlist_function("rb_yarv_ary_entry_internal") .allowlist_function("rb_vm_get_untagged_block_handler") + .allowlist_function("rb_vm_untag_block_handler") .allowlist_function("rb_FL_TEST") .allowlist_function("rb_FL_TEST_RAW") .allowlist_function("rb_RB_TYPE_P") diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 5d6060dd49d3a3..9276d0af6b81e7 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -529,7 +529,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), &Insn::GuardBitEquals { val, expected, reason, state } => gen_guard_bit_equals(jit, asm, opnd!(val), expected, reason, &function.frame_state(state)), - &Insn::GuardBlockParamProxy { level, state } => no_output!(gen_guard_block_param_proxy(jit, asm, level, &function.frame_state(state))), + &Insn::GuardAnyBitSet { val, mask, reason, state } => gen_guard_any_bit_set(jit, asm, opnd!(val), mask, reason, &function.frame_state(state)), + &Insn::GuardNoBitsSet { val, mask, reason, state } => gen_guard_no_bits_set(jit, asm, opnd!(val), mask, reason, &function.frame_state(state)), Insn::GuardNotFrozen { recv, state } => gen_guard_not_frozen(jit, asm, opnd!(recv), &function.frame_state(*state)), Insn::GuardNotShared { recv, state } => gen_guard_not_shared(jit, asm, opnd!(recv), &function.frame_state(*state)), &Insn::GuardLess { left, right, state } => gen_guard_less(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), @@ -580,6 +581,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::GuardShape { val, shape, state } => gen_guard_shape(jit, asm, opnd!(val), shape, &function.frame_state(state)), Insn::LoadPC => gen_load_pc(asm), Insn::LoadEC => gen_load_ec(), + &Insn::GetEP { level } => gen_get_ep(asm, level), Insn::GetLEP => gen_get_lep(jit, asm), Insn::LoadSelf => gen_load_self(), &Insn::LoadField { recv, id, offset, return_type } => gen_load_field(asm, opnd!(recv), id, offset, return_type), @@ -786,26 +788,6 @@ fn gen_getblockparam(jit: &mut JITState, asm: &mut Assembler, ep_offset: u32, le asm.load(Opnd::mem(VALUE_BITS, ep, offset)) } -fn gen_guard_block_param_proxy(jit: &JITState, asm: &mut Assembler, level: u32, state: &FrameState) { - // Bail out if the `&block` local variable has been modified - let ep = gen_get_ep(asm, level); - let flags = Opnd::mem(64, ep, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); - asm.test(flags, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()); - asm.jnz(side_exit(jit, state, SideExitReason::BlockParamProxyModified)); - - // This handles two cases which are nearly identical - // Block handler is a tagged pointer. Look at the tag. - // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01 - // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03 - // So to check for either of those cases we can use: val & 0x1 == 0x1 - const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers"); - - // Bail ouf if the block handler is neither ISEQ nor ifunc - let block_handler = asm.load(Opnd::mem(64, ep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); - asm.test(block_handler, 0x1.into()); - asm.jz(side_exit(jit, state, SideExitReason::BlockParamProxyNotIseqOrIfunc)); -} - fn gen_guard_not_frozen(jit: &JITState, asm: &mut Assembler, recv: Opnd, state: &FrameState) -> Opnd { let recv = asm.load(recv); // It's a heap object, so check the frozen flag @@ -2338,6 +2320,32 @@ fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, val } +fn mask_to_opnd(mask: crate::hir::Const) -> Option { + match mask { + crate::hir::Const::CUInt8(v) => Some(Opnd::UImm(v as u64)), + crate::hir::Const::CUInt16(v) => Some(Opnd::UImm(v as u64)), + crate::hir::Const::CUInt32(v) => Some(Opnd::UImm(v as u64)), + crate::hir::Const::CUInt64(v) => Some(Opnd::UImm(v)), + _ => None + } +} + +/// Compile a bitmask check with a side exit if none of the masked bits are not set +fn gen_guard_any_bit_set(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, mask: crate::hir::Const, reason: SideExitReason, state: &FrameState) -> lir::Opnd { + let mask_opnd = mask_to_opnd(mask).unwrap_or_else(|| panic!("gen_guard_any_bit_set: unexpected hir::Const {mask:?}")); + asm.test(val, mask_opnd); + asm.jz(side_exit(jit, state, reason)); + val +} + +/// Compile a bitmask check with a side exit if any of the masked bits are set +fn gen_guard_no_bits_set(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, mask: crate::hir::Const, reason: SideExitReason, state: &FrameState) -> lir::Opnd { + let mask_opnd = mask_to_opnd(mask).unwrap_or_else(|| panic!("gen_guard_no_bits_set: unexpected hir::Const {mask:?}")); + asm.test(val, mask_opnd); + asm.jnz(side_exit(jit, state, reason)); + val +} + /// Generate code that records unoptimized C functions if --zjit-stats is enabled fn gen_incr_counter_ptr(asm: &mut Assembler, counter_ptr: *mut u64) { if get_option!(stats) { diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 51faaab9c24658..94b2a443c8b043 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -1396,6 +1396,8 @@ pub(crate) mod ids { name: self_ content: b"self" name: rb_ivar_get_at_no_ractor_check name: _shape_id + name: _env_data_index_flags + name: _env_data_index_specval } /// Get an CRuby `ID` to an interned string, e.g. a particular method name. diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 15533180dad72f..969c5a4c693a73 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -1706,38 +1706,39 @@ pub const YARVINSN_trace_setlocal_WC_0: ruby_vminsn_type = 214; pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; -pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; -pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 219; -pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 220; -pub const YARVINSN_zjit_send: ruby_vminsn_type = 221; -pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 243; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 244; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 245; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 246; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 247; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 248; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 249; +pub const YARVINSN_zjit_getblockparamproxy: ruby_vminsn_type = 218; +pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_setinstancevariable: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_definedivar: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_invokesuper: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_invokeblock: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 245; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 246; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 247; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 248; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 249; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 250; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), @@ -2081,6 +2082,7 @@ unsafe extern "C" { pub fn rb_zjit_class_initialized_p(klass: VALUE) -> bool; pub fn rb_zjit_class_get_alloc_func(klass: VALUE) -> rb_alloc_func_t; pub fn rb_zjit_class_has_default_allocator(klass: VALUE) -> bool; + pub fn rb_vm_untag_block_handler(block_handler: VALUE) -> VALUE; pub fn rb_vm_get_untagged_block_handler(reg_cfp: *mut rb_control_frame_t) -> VALUE; pub fn rb_zjit_writebarrier_check_immediate(recv: VALUE, val: VALUE); pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index b523d8430f3e5e..32519a5b978847 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -506,6 +506,7 @@ pub enum SideExitReason { Interrupt, BlockParamProxyModified, BlockParamProxyNotIseqOrIfunc, + BlockParamProxyNotNil, BlockParamWbRequired, StackOverflow, FixnumModByZero, @@ -855,6 +856,9 @@ pub enum Insn { /// Set a class variable `id` to `val` SetClassVar { id: ID, val: InsnId, ic: *const iseq_inline_cvar_cache_entry, state: InsnId }, + /// Get the EP at the given level from the current CFP. + GetEP { level: u32 }, + /// Get the EP of the ISeq of the containing method, or "local level", skipping over block-level EPs. /// Equivalent of GET_LEP() macro. GetLEP, @@ -1018,11 +1022,12 @@ pub enum Insn { GuardTypeNot { val: InsnId, guard_type: Type, state: InsnId }, /// Side-exit if val is not the expected Const. GuardBitEquals { val: InsnId, expected: Const, reason: SideExitReason, state: InsnId }, + /// Side-exit if (val & mask) == 0 + GuardAnyBitSet { val: InsnId, mask: Const, reason: SideExitReason, state: InsnId }, + /// Side-exit if (val & mask) != 0 + GuardNoBitsSet { val: InsnId, mask: Const, reason: SideExitReason, state: InsnId }, /// Side-exit if val doesn't have the expected shape. GuardShape { val: InsnId, shape: ShapeId, state: InsnId }, - /// Side-exit if the block param has been modified or the block handler for the frame - /// is neither ISEQ nor ifunc, which makes it incompatible with rb_block_param_proxy. - GuardBlockParamProxy { level: u32, state: InsnId }, /// Side-exit if val is frozen. Does *not* check if the val is an immediate; assumes that it is /// a heap object. GuardNotFrozen { recv: InsnId, state: InsnId }, @@ -1066,7 +1071,7 @@ impl Insn { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. } | Insn::SetLocal { .. } | Insn::Throw { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } + | Insn::CheckInterrupts { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => false, _ => true, @@ -1154,6 +1159,7 @@ impl Insn { Insn::DefinedIvar { .. } => effects::Any, Insn::LoadPC { .. } => Effect::read_write(abstract_heaps::PC, abstract_heaps::Empty), Insn::LoadEC { .. } => effects::Empty, + Insn::GetEP { .. } => effects::Empty, Insn::GetLEP { .. } => effects::Empty, Insn::LoadSelf { .. } => Effect::read_write(abstract_heaps::Frame, abstract_heaps::Empty), Insn::LoadField { .. } => Effect::read_write(abstract_heaps::Other, abstract_heaps::Empty), @@ -1220,8 +1226,9 @@ impl Insn { Insn::GuardType { .. } => effects::Any, Insn::GuardTypeNot { .. } => effects::Any, Insn::GuardBitEquals { .. } => effects::Any, + Insn::GuardAnyBitSet { .. } => effects::Any, + Insn::GuardNoBitsSet { .. } => effects::Any, Insn::GuardShape { .. } => effects::Any, - Insn::GuardBlockParamProxy { .. } => effects::Any, Insn::GuardNotFrozen { .. } => effects::Any, Insn::GuardNotShared { .. } => effects::Any, Insn::GuardGreaterEq { .. } => effects::Any, @@ -1541,8 +1548,9 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::RefineType { val, new_type, .. } => { write!(f, "RefineType {val}, {}", new_type.print(self.ptr_map)) }, Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, + Insn::GuardAnyBitSet { val, mask, .. } => { write!(f, "GuardBitSet {val}, {}", mask.print(self.ptr_map)) }, + Insn::GuardNoBitsSet { val, mask, .. } => { write!(f, "GuardBitNotSet {val}, {}", mask.print(self.ptr_map)) }, &Insn::GuardShape { val, shape, .. } => { write!(f, "GuardShape {val}, {:p}", self.ptr_map.map_shape(shape)) }, - Insn::GuardBlockParamProxy { level, .. } => write!(f, "GuardBlockParamProxy l{level}"), Insn::GuardNotFrozen { recv, .. } => write!(f, "GuardNotFrozen {recv}"), Insn::GuardNotShared { recv, .. } => write!(f, "GuardNotShared {recv}"), Insn::GuardLess { left, right, .. } => write!(f, "GuardLess {left}, {right}"), @@ -1604,6 +1612,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy()), Insn::LoadPC => write!(f, "LoadPC"), Insn::LoadEC => write!(f, "LoadEC"), + &Insn::GetEP { level } => write!(f, "GetEP {level}"), Insn::GetLEP => write!(f, "GetLEP"), Insn::LoadSelf => write!(f, "LoadSelf"), &Insn::LoadField { recv, id, offset, return_type: _ } => write!(f, "LoadField {recv}, :{}@{:p}", id.contents_lossy(), self.ptr_map.map_offset(offset)), @@ -2187,6 +2196,7 @@ impl Function { | EntryPoint {..} | LoadPC | LoadEC + | GetEP {..} | GetLEP | LoadSelf | IncrCounterPtr {..} @@ -2228,8 +2238,9 @@ impl Function { &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type, state }, &GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state }, &GuardBitEquals { val, expected, reason, state } => GuardBitEquals { val: find!(val), expected, reason, state }, + &GuardAnyBitSet { val, mask, reason, state } => GuardAnyBitSet { val: find!(val), mask, reason, state }, + &GuardNoBitsSet { val, mask, reason, state } => GuardNoBitsSet { val: find!(val), mask, reason, state }, &GuardShape { val, shape, state } => GuardShape { val: find!(val), shape, state }, - &GuardBlockParamProxy { level, state } => GuardBlockParamProxy { level, state: find!(state) }, &GuardNotFrozen { recv, state } => GuardNotFrozen { recv: find!(recv), state }, &GuardNotShared { recv, state } => GuardNotShared { recv: find!(recv), state }, &GuardGreaterEq { left, right, state } => GuardGreaterEq { left: find!(left), right: find!(right), state }, @@ -2429,7 +2440,7 @@ impl Function { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::SetClassVar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_) | Insn::IncrCounterPtr { .. } - | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } + | Insn::CheckInterrupts { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::StoreField { .. } | Insn::WriteBarrier { .. } | Insn::HashAset { .. } | Insn::ArrayAset { .. } => panic!("Cannot infer type of instruction with no output: {}. See Insn::has_output().", self.insns[insn.0]), Insn::Const { val: Const::Value(val) } => Type::from_value(*val), @@ -2488,6 +2499,8 @@ impl Function { Insn::RefineType { val, new_type, .. } => self.type_of(*val).intersection(*new_type), Insn::GuardTypeNot { .. } => types::BasicObject, Insn::GuardBitEquals { val, expected, .. } => self.type_of(*val).intersection(Type::from_const(*expected)), + Insn::GuardAnyBitSet { val, .. } => self.type_of(*val), + Insn::GuardNoBitsSet { val, .. } => self.type_of(*val), Insn::GuardShape { val, .. } => self.type_of(*val), Insn::GuardNotFrozen { recv, .. } | Insn::GuardNotShared { recv, .. } => self.type_of(*recv), Insn::GuardLess { left, .. } => self.type_of(*left), @@ -2532,6 +2545,7 @@ impl Function { Insn::GetIvar { .. } => types::BasicObject, Insn::LoadPC => types::CPtr, Insn::LoadEC => types::CPtr, + Insn::GetEP { .. } => types::CPtr, Insn::GetLEP => types::CPtr, Insn::LoadSelf => types::BasicObject, &Insn::LoadField { return_type, .. } => return_type, @@ -2659,7 +2673,9 @@ impl Function { Insn::GuardType { val, .. } | Insn::GuardTypeNot { val, .. } | Insn::GuardShape { val, .. } - | Insn::GuardBitEquals { val, .. } => self.chase_insn(val), + | Insn::GuardBitEquals { val, .. } + | Insn::GuardAnyBitSet { val, .. } + | Insn::GuardNoBitsSet { val, .. } => self.chase_insn(val), | Insn::RefineType { val, .. } => self.chase_insn(val), _ => id, } @@ -4517,6 +4533,7 @@ impl Function { | &Insn::EntryPoint { .. } | &Insn::LoadPC | &Insn::LoadEC + | &Insn::GetEP { .. } | &Insn::GetLEP | &Insn::LoadSelf | &Insn::GetLocal { .. } @@ -4605,6 +4622,8 @@ impl Function { | &Insn::GuardType { val, state, .. } | &Insn::GuardTypeNot { val, state, .. } | &Insn::GuardBitEquals { val, state, .. } + | &Insn::GuardAnyBitSet { val, state, .. } + | &Insn::GuardNoBitsSet { val, state, .. } | &Insn::GuardShape { val, state, .. } | &Insn::GuardNotFrozen { recv: val, state } | &Insn::GuardNotShared { recv: val, state } @@ -4755,7 +4774,6 @@ impl Function { worklist.push_back(recv); worklist.push_back(val); } - &Insn::GuardBlockParamProxy { state, .. } | &Insn::GetGlobal { state, .. } | &Insn::GetSpecialSymbol { state, .. } | &Insn::GetSpecialNumber { state, .. } | @@ -5301,12 +5319,12 @@ impl Function { | Insn::GetGlobal { .. } | Insn::LoadPC | Insn::LoadEC + | Insn::GetEP { .. } | Insn::GetLEP | Insn::LoadSelf | Insn::Snapshot { .. } | Insn::Jump { .. } | Insn::EntryPoint { .. } - | Insn::GuardBlockParamProxy { .. } | Insn::GuardSuperMethodEntry { .. } | Insn::GetBlockHandler { .. } | Insn::PatchPoint { .. } @@ -5530,6 +5548,18 @@ impl Function { Const::CPtr(_) => self.assert_subtype(insn_id, val, types::CPtr), } } + Insn::GuardAnyBitSet { val, mask, .. } + | Insn::GuardNoBitsSet { val, mask, .. } => { + match mask { + Const::CUInt8(_) | Const::CUInt16(_) | Const::CUInt32(_) | Const::CUInt64(_) + if self.is_a(val, types::CInt) || self.is_a(val, types::RubyValue) => { + Ok(()) + } + _ => { + Err(ValidationError::MiscValidationError(insn_id, "GuardAnyBitSet/GuardNoBitsSet can only compare RubyValue/CUInt or CInt/CUInt".to_string())) + } + } + } Insn::GuardLess { left, right, .. } | Insn::GuardGreaterEq { left, right, .. } => { self.assert_subtype(insn_id, left, types::CInt64)?; @@ -6198,7 +6228,38 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } } } - } else { + } else if opcode == YARVINSN_getblockparamproxy || opcode == YARVINSN_trace_getblockparamproxy { + if get_option!(stats) { + let iseq_insn_idx = exit_state.insn_idx; + if let Some([block_handler_distribution]) = profiles.payload.profile.get_operand_types(iseq_insn_idx) { + let summary = TypeDistributionSummary::new(block_handler_distribution); + + if summary.is_monomorphic() { + let obj = summary.bucket(0).class(); + if unsafe { rb_IMEMO_TYPE_P(obj, imemo_iseq) == 1} { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_iseq)); + } else if unsafe { rb_IMEMO_TYPE_P(obj, imemo_ifunc) == 1} { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_ifunc)); + } + else if obj.nil_p() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_nil)); + } + else if obj.symbol_p() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_symbol)); + } else if unsafe { rb_obj_is_proc(obj).test() } { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_proc)); + } + } else if summary.is_polymorphic() || summary.is_skewed_polymorphic() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_polymorphic)); + } else if summary.is_megamorphic() || summary.is_skewed_megamorphic() { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_megamorphic)); + } + } else { + fun.push_insn(block, Insn::IncrCounter(Counter::getblockparamproxy_handler_no_profiles)); + } + } + } + else { profiles.profile_stack(&exit_state); } @@ -6593,9 +6654,39 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } YARVINSN_getblockparamproxy => { let level = get_arg(pc, 1).as_u32(); - fun.push_insn(block, Insn::GuardBlockParamProxy { level, state: exit_id }); - // TODO(Shopify/ruby#753): GC root, so we should be able to avoid unnecessary GC tracing - state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(unsafe { rb_block_param_proxy }) })); + + let profiled_block_type = if let Some([block_handler_distribution]) = profiles.payload.profile.get_operand_types(exit_state.insn_idx) { + let summary = TypeDistributionSummary::new(block_handler_distribution); + summary.is_monomorphic().then_some(summary.bucket(0).class()) + } else { + None + }; + + let ep = fun.push_insn(block, Insn::GetEP { level }); + let flags = fun.push_insn(block, Insn::LoadField { recv: ep, id: ID!(_env_data_index_flags), offset: SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32), return_type: types::CInt64 }); + fun.push_insn(block, Insn::GuardNoBitsSet { val: flags, mask: Const::CUInt64(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()), reason: SideExitReason::BlockParamProxyModified, state: exit_id }); + + let block_handler = fun.push_insn(block, Insn::LoadField { recv: ep, id: ID!(_env_data_index_specval), offset: SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL, return_type: types::CInt64 }); + + match profiled_block_type { + Some(ty) if ty.nil_p() => { + fun.push_insn(block, Insn::GuardBitEquals { val: block_handler, expected: Const::CInt64(VM_BLOCK_HANDLER_NONE.into()), reason: SideExitReason::BlockParamProxyNotNil, state: exit_id }); + state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(Qnil) })); + } + _ => { + // This handles two cases which are nearly identical + // Block handler is a tagged pointer. Look at the tag. + // VM_BH_ISEQ_BLOCK_P(): block_handler & 0x03 == 0x01 + // VM_BH_IFUNC_P(): block_handler & 0x03 == 0x03 + // So to check for either of those cases we can use: val & 0x1 == 0x1 + const _: () = assert!(RUBY_SYMBOL_FLAG & 1 == 0, "guard below rejects symbol block handlers"); + + // Bail out if the block handler is neither ISEQ nor ifunc + fun.push_insn(block, Insn::GuardAnyBitSet { val: block_handler, mask: Const::CUInt64(0x1), reason: SideExitReason::BlockParamProxyNotIseqOrIfunc, state: exit_id }); + // TODO(Shopify/ruby#753): GC root, so we should be able to avoid unnecessary GC tracing + state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(unsafe { rb_block_param_proxy }) })); + } + } } YARVINSN_getblockparam => { fn new_branch_block( diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 70afd54022e40e..c1059094ac531a 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -3875,7 +3875,7 @@ mod hir_opt_tests { eval(" def test(&block) = tap(&block) "); - assert_snapshot!(hir_string("test"), @r" + assert_snapshot!(hir_string("test"), @" fn test@:2: bb0(): EntryPoint interpreter @@ -3886,11 +3886,15 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): - GuardBlockParamProxy l0 - v15:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) - v17:BasicObject = Send v8, 0x1008, :tap, v15 # SendFallbackReason: Uncategorized(send) + v14:CPtr = GetEP 0 + v15:CInt64 = LoadField v14, :_env_data_index_flags@0x1000 + v16:CInt64 = GuardBitNotSet v15, CUInt64(512) + v17:CInt64 = LoadField v14, :_env_data_index_specval@0x1001 + v18:CInt64 = GuardBitSet v17, CUInt64(1) + v19:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + v21:BasicObject = Send v8, 0x1010, :tap, v19 # SendFallbackReason: Uncategorized(send) CheckInterrupts - Return v17 + Return v21 "); } @@ -6518,11 +6522,42 @@ mod hir_opt_tests { #[test] fn test_do_not_optimize_send_with_block_forwarding() { + eval(r#" + def test(&block) = [].map(&block) + test { |x| x }; test { |x| x } + "#); + assert_snapshot!(hir_string("test"), @" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :block, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v13:ArrayExact = NewArray + v15:CPtr = GetEP 0 + v16:CInt64 = LoadField v15, :_env_data_index_flags@0x1000 + v17:CInt64 = GuardBitNotSet v16, CUInt64(512) + v18:CInt64 = LoadField v15, :_env_data_index_specval@0x1001 + v19:CInt64 = GuardBitSet v18, CUInt64(1) + v20:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + IncrCounter complex_arg_pass_caller_blockarg + v22:BasicObject = Send v13, 0x1010, :map, v20 # SendFallbackReason: Complex argument passing + CheckInterrupts + Return v22 + "); + } + + #[test] + fn test_replace_block_param_proxy_with_nil() { eval(r#" def test(&block) = [].map(&block) test; test "#); - assert_snapshot!(hir_string("test"), @r" + assert_snapshot!(hir_string("test"), @" fn test@:2: bb0(): EntryPoint interpreter @@ -6534,12 +6569,50 @@ mod hir_opt_tests { Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): v13:ArrayExact = NewArray - GuardBlockParamProxy l0 - v16:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v15:CPtr = GetEP 0 + v16:CInt64 = LoadField v15, :_env_data_index_flags@0x1000 + v17:CInt64 = GuardBitNotSet v16, CUInt64(512) + v18:CInt64 = LoadField v15, :_env_data_index_specval@0x1001 + v19:CInt64[0] = GuardBitEquals v18, CInt64(0) + v20:NilClass = Const Value(nil) IncrCounter complex_arg_pass_caller_blockarg - v18:BasicObject = Send v13, 0x1008, :map, v16 # SendFallbackReason: Complex argument passing + v22:BasicObject = Send v13, 0x1008, :map, v20 # SendFallbackReason: Complex argument passing CheckInterrupts - Return v18 + Return v22 + "); + } + + #[test] + fn test_replace_block_param_proxy_with_nil_nested() { + eval(r#" + def test(&block) + proc do + [].map(&block) + end + end + test; test + "#); + assert_snapshot!(hir_string_proc("test"), @" + fn block in test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:ArrayExact = NewArray + v12:CPtr = GetEP 1 + v13:CInt64 = LoadField v12, :_env_data_index_flags@0x1000 + v14:CInt64 = GuardBitNotSet v13, CUInt64(512) + v15:CInt64 = LoadField v12, :_env_data_index_specval@0x1001 + v16:CInt64 = GuardBitSet v15, CUInt64(1) + v17:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + IncrCounter complex_arg_pass_caller_blockarg + v19:BasicObject = Send v10, 0x1010, :map, v17 # SendFallbackReason: Complex argument passing + CheckInterrupts + Return v19 "); } diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index e0b0129ea1ce5a..dbab964976f91a 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -2038,7 +2038,7 @@ pub mod hir_build_tests { eval(" def test(a, ...) = foo(a, ...) "); - assert_snapshot!(hir_string("test"), @r" + assert_snapshot!(hir_string("test"), @" fn test@:2: bb0(): EntryPoint interpreter @@ -2056,8 +2056,12 @@ pub mod hir_build_tests { bb2(v16:BasicObject, v17:BasicObject, v18:ArrayExact, v19:BasicObject, v20:BasicObject, v21:NilClass): v28:ArrayExact = ToArray v18 PatchPoint NoEPEscape(test) - GuardBlockParamProxy l0 - v34:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v33:CPtr = GetEP 0 + v34:CInt64 = LoadField v33, :_env_data_index_flags@0x1000 + v35:CInt64 = GuardBitNotSet v34, CUInt64(512) + v36:CInt64 = LoadField v33, :_env_data_index_specval@0x1001 + v37:CInt64 = GuardBitSet v36, CUInt64(1) + v38:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) SideExit UnhandledYARVInsn(splatkw) "); } @@ -3409,7 +3413,7 @@ pub mod hir_build_tests { let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("Dir", "open")); assert!(iseq_contains_opcode(iseq, YARVINSN_opt_invokebuiltin_delegate), "iseq Dir.open does not contain invokebuiltin"); let function = iseq_to_hir(iseq).unwrap(); - assert_snapshot!(hir_string_function(&function), @r" + assert_snapshot!(hir_string_function(&function), @" fn open@: bb0(): EntryPoint interpreter @@ -3428,20 +3432,24 @@ pub mod hir_build_tests { bb2(v16:BasicObject, v17:BasicObject, v18:BasicObject, v19:BasicObject, v20:BasicObject, v21:NilClass): v25:BasicObject = InvokeBuiltin dir_s_open, v16, v17, v18 PatchPoint NoEPEscape(open) - GuardBlockParamProxy l0 - v32:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v31:CPtr = GetEP 0 + v32:CInt64 = LoadField v31, :_env_data_index_flags@0x1000 + v33:CInt64 = GuardBitNotSet v32, CUInt64(512) + v34:CInt64 = LoadField v31, :_env_data_index_specval@0x1001 + v35:CInt64 = GuardBitSet v34, CUInt64(1) + v36:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) + CheckInterrupts + v39:CBool[true] = Test v36 + v40 = RefineType v36, Falsy + IfFalse v39, bb3(v16, v17, v18, v19, v20, v25) + v42:HeapObject[BlockParamProxy] = RefineType v36, Truthy + v46:BasicObject = InvokeBlock, v25 # SendFallbackReason: Uncategorized(invokeblock) + v49:BasicObject = InvokeBuiltin dir_s_close, v16, v25 CheckInterrupts - v35:CBool[true] = Test v32 - v36 = RefineType v32, Falsy - IfFalse v35, bb3(v16, v17, v18, v19, v20, v25) - v38:HeapObject[BlockParamProxy] = RefineType v32, Truthy - v42:BasicObject = InvokeBlock, v25 # SendFallbackReason: Uncategorized(invokeblock) - v45:BasicObject = InvokeBuiltin dir_s_close, v16, v25 - CheckInterrupts - Return v42 - bb3(v51, v52, v53, v54, v55, v56): + Return v46 + bb3(v55, v56, v57, v58, v59, v60): CheckInterrupts - Return v56 + Return v60 "); } diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index c1feb759529e15..ad6da06c71e91b 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -91,6 +91,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_size => profile_operands(profiler, profile, 1), YARVINSN_opt_succ => profile_operands(profiler, profile, 1), YARVINSN_invokeblock => profile_block_handler(profiler, profile), + YARVINSN_getblockparamproxy => profile_getblockparamproxy(profiler, profile), YARVINSN_invokesuper => profile_invokesuper(profiler, profile), YARVINSN_opt_send_without_block | YARVINSN_send => { let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); @@ -155,6 +156,22 @@ fn profile_block_handler(profiler: &mut Profiler, profile: &mut IseqProfile) { types[0].observe(ty); } +fn profile_getblockparamproxy(profiler: &mut Profiler, profile: &mut IseqProfile) { + let types = &mut profile.opnd_types[profiler.insn_idx]; + if types.is_empty() { + types.resize(1, TypeDistribution::new()); + } + + let level = profiler.insn_opnd(1).as_u32(); + let ep = unsafe { get_cfp_ep_level(profiler.cfp, level) }; + let block_handler = unsafe { *ep.offset(VM_ENV_DATA_INDEX_SPECVAL as isize) }; + let untagged = unsafe { rb_vm_untag_block_handler(block_handler) }; + + let ty = ProfiledType::object(untagged); + VALUE::from(profiler.iseq).write_barrier(ty.class()); + types[0].observe(ty); +} + fn profile_invokesuper(profiler: &mut Profiler, profile: &mut IseqProfile) { let cme = unsafe { rb_vm_frame_method_entry(profiler.cfp) }; let cme_value = VALUE(cme as usize); // CME is a T_IMEMO, which is a VALUE diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index bb11b96dd9a403..367a19fc32fc67 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -210,6 +210,7 @@ make_counters! { exit_stackoverflow, exit_block_param_proxy_modified, exit_block_param_proxy_not_iseq_or_ifunc, + exit_block_param_proxy_not_nil, exit_block_param_wb_required, exit_too_many_keyword_parameters, } @@ -422,6 +423,15 @@ make_counters! { invokeblock_handler_polymorphic, invokeblock_handler_megamorphic, invokeblock_handler_no_profiles, + + getblockparamproxy_handler_iseq, + getblockparamproxy_handler_ifunc, + getblockparamproxy_handler_symbol, + getblockparamproxy_handler_proc, + getblockparamproxy_handler_nil, + getblockparamproxy_handler_polymorphic, + getblockparamproxy_handler_megamorphic, + getblockparamproxy_handler_no_profiles, } /// Increase a counter by a specified amount @@ -558,6 +568,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { StackOverflow => exit_stackoverflow, BlockParamProxyModified => exit_block_param_proxy_modified, BlockParamProxyNotIseqOrIfunc => exit_block_param_proxy_not_iseq_or_ifunc, + BlockParamProxyNotNil => exit_block_param_proxy_not_nil, BlockParamWbRequired => exit_block_param_wb_required, TooManyKeywordParameters => exit_too_many_keyword_parameters, PatchPoint(Invariant::BOPRedefined { .. }) From 33d828470bc86b494fe9b8f6b684d7e8153f3b95 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Thu, 29 Jan 2026 13:02:33 -0500 Subject: [PATCH 61/77] ZJIT: Support polymorphic send without block (#15949) Break out the different cases into different blocks in the bytecode to HIR parser. Use a `RefineType` to plumb the case's type through so the type specialization can see it. Then join the logic back to the rest of the current block after each case's send. lobsters before
``` ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (58.7% of total 4,476,259): Hash#fetch: 849,219 (19.0%) String#start_with?: 328,017 ( 7.3%) Regexp#match?: 148,149 ( 3.3%) Hash#key?: 135,034 ( 3.0%) Kernel#is_a?: 110,030 ( 2.5%) Set#include?: 97,934 ( 2.2%) Integer#===: 96,952 ( 2.2%) Process.clock_gettime: 92,795 ( 2.1%) String#sub!: 84,940 ( 1.9%) String.new: 80,730 ( 1.8%) SQLite3::Statement#done?: 73,532 ( 1.6%) SQLite3::Statement#step: 73,532 ( 1.6%) Time#plus_without_duration: 66,724 ( 1.5%) String#<<: 63,954 ( 1.4%) Time#to_i: 60,817 ( 1.4%) Hash#delete: 60,664 ( 1.4%) Time#subsec: 60,363 ( 1.3%) String#hash: 51,261 ( 1.1%) IO#read: 47,753 ( 1.1%) String#to_sym: 43,915 ( 1.0%) Top-20 calls to C functions from JIT code (83.7% of total 35,570,418): rb_vm_opt_send_without_block: 10,516,746 (29.6%) rb_vm_env_write: 2,382,117 ( 6.7%) rb_zjit_writebarrier_check_immediate: 2,241,285 ( 6.3%) rb_hash_aref: 2,189,588 ( 6.2%) rb_vm_getinstancevariable: 1,762,596 ( 5.0%) rb_ivar_get_at_no_ractor_check: 1,702,246 ( 4.8%) rb_vm_send: 1,460,754 ( 4.1%) rb_hash_aset: 1,151,302 ( 3.2%) rb_vm_setinstancevariable: 1,029,286 ( 2.9%) rb_obj_is_kind_of: 1,000,979 ( 2.8%) rb_vm_opt_getconstant_path: 623,490 ( 1.8%) rb_vm_invokesuper: 595,831 ( 1.7%) Hash#fetch: 562,212 ( 1.6%) rb_vm_invokeblock: 545,744 ( 1.5%) rb_class_allocate_instance: 422,454 ( 1.2%) rb_ec_ary_new_from_values: 388,035 ( 1.1%) String#start_with?: 328,017 ( 0.9%) rb_hash_new_with_size: 289,130 ( 0.8%) fetch: 287,007 ( 0.8%) rb_vm_sendforward: 284,183 ( 0.8%) Top-1 not optimized method types for send (100.0% of total 428): null: 428 (100.0%) Top-3 not optimized method types for send_without_block (100.0% of total 102,413): optimized_send: 92,837 (90.6%) null: 8,595 ( 8.4%) optimized_block_call: 981 ( 1.0%) Top-3 not optimized method types for super (100.0% of total 517,931): cfunc: 489,746 (94.6%) alias: 26,398 ( 5.1%) attrset: 1,787 ( 0.3%) Top-4 instructions with uncategorized fallback reason (100.0% of total 868,223): invokeblock: 545,744 (62.9%) sendforward: 284,183 (32.7%) invokesuperforward: 29,713 ( 3.4%) opt_send_without_block: 8,583 ( 1.0%) Top-20 send fallback reasons (100.0% of total 13,432,971): send_without_block_polymorphic: 4,825,641 (35.9%) singleton_class_seen: 3,257,447 (24.2%) send_without_block_no_profiles: 1,906,060 (14.2%) uncategorized: 868,223 ( 6.5%) send_no_profiles: 806,168 ( 6.0%) one_or_more_complex_arg_pass: 537,965 ( 4.0%) super_not_optimized_method_type: 517,931 ( 3.9%) send_without_block_megamorphic: 158,893 ( 1.2%) too_many_args_for_lir: 127,160 ( 0.9%) send_polymorphic: 112,628 ( 0.8%) send_without_block_not_optimized_need_permission: 100,041 ( 0.7%) send_without_block_not_optimized_method_type_optimized: 93,818 ( 0.7%) super_complex_args_pass: 34,022 ( 0.3%) super_target_complex_args_pass: 25,536 ( 0.2%) super_polymorphic: 16,853 ( 0.1%) obj_to_string_not_string: 13,794 ( 0.1%) argc_param_mismatch: 9,927 ( 0.1%) send_without_block_not_optimized_method_type: 8,595 ( 0.1%) send_without_block_direct_keyword_mismatch: 5,568 ( 0.0%) send_megamorphic: 4,525 ( 0.0%) Top-4 setivar fallback reasons (100.0% of total 1,029,286): not_monomorphic: 992,723 (96.4%) not_t_object: 21,354 ( 2.1%) too_complex: 15,188 ( 1.5%) new_shape_needs_extension: 21 ( 0.0%) Top-2 getivar fallback reasons (100.0% of total 1,790,794): not_monomorphic: 1,750,108 (97.7%) too_complex: 40,686 ( 2.3%) Top-3 definedivar fallback reasons (100.0% of total 81,713): not_monomorphic: 80,197 (98.1%) too_complex: 796 ( 1.0%) not_t_object: 720 ( 0.9%) Top-6 invokeblock handler (100.0% of total 545,744): monomorphic_iseq: 249,809 (45.8%) polymorphic: 217,915 (39.9%) monomorphic_ifunc: 46,244 ( 8.5%) monomorphic_other: 27,938 ( 5.1%) megamorphic: 2,943 ( 0.5%) no_profiles: 895 ( 0.2%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 652,565): param_forwardable: 246,421 (37.8%) param_block: 198,808 (30.5%) param_rest: 101,529 (15.6%) param_kwrest: 44,809 ( 6.9%) caller_blockarg: 24,596 ( 3.8%) caller_splat: 15,969 ( 2.4%) caller_kw_splat: 14,227 ( 2.2%) caller_kwarg: 6,206 ( 1.0%) Top-1 compile error reasons (100.0% of total 38,981): exception_handler: 38,981 (100.0%) Top-5 unhandled YARV insns (100.0% of total 4,154): getconstant: 2,566 (61.8%) checkmatch: 929 (22.4%) setblockparam: 443 (10.7%) once: 171 ( 4.1%) expandarray: 45 ( 1.1%) Top-3 unhandled HIR insns (100.0% of total 75,904): throw: 39,721 (52.3%) invokebuiltin: 35,772 (47.1%) array_max: 411 ( 0.5%) Top-20 side exit reasons (100.0% of total 3,770,125): guard_shape_failure: 1,927,218 (51.1%) guard_type_failure: 1,395,315 (37.0%) block_param_proxy_not_iseq_or_ifunc: 257,894 ( 6.8%) unhandled_hir_insn: 75,904 ( 2.0%) compile_error: 38,981 ( 1.0%) patchpoint_stable_constant_names: 25,375 ( 0.7%) block_param_proxy_modified: 13,713 ( 0.4%) fixnum_lshift_overflow: 10,085 ( 0.3%) fixnum_mult_overflow: 8,550 ( 0.2%) unhandled_yarv_insn: 4,154 ( 0.1%) unhandled_block_arg: 2,548 ( 0.1%) unhandled_newarray_send_pack: 2,322 ( 0.1%) patchpoint_no_singleton_class: 2,008 ( 0.1%) patchpoint_no_ep_escape: 1,683 ( 0.0%) obj_to_string_fallback: 1,358 ( 0.0%) patchpoint_method_redefined: 1,212 ( 0.0%) expandarray_failure: 837 ( 0.0%) guard_super_method_entry: 737 ( 0.0%) guard_less_failure: 163 ( 0.0%) interrupt: 49 ( 0.0%) send_count: 46,003,239 dynamic_send_count: 13,432,971 (29.2%) optimized_send_count: 32,570,268 (70.8%) dynamic_setivar_count: 1,029,286 ( 2.2%) dynamic_getivar_count: 1,790,794 ( 3.9%) dynamic_definedivar_count: 81,713 ( 0.2%) iseq_optimized_send_count: 15,117,301 (32.9%) inline_cfunc_optimized_send_count: 11,837,918 (25.7%) inline_iseq_optimized_send_count: 884,606 ( 1.9%) non_variadic_cfunc_optimized_send_count: 2,597,998 ( 5.6%) variadic_cfunc_optimized_send_count: 2,132,445 ( 4.6%) compiled_iseq_count: 5,259 failed_iseq_count: 0 compile_time: 1,409ms profile_time: 10ms gc_time: 11ms invalidation_time: 77ms vm_write_pc_count: 40,924,587 vm_write_sp_count: 40,924,587 vm_write_locals_count: 39,740,467 vm_write_stack_count: 39,740,467 vm_write_to_parent_iseq_local_count: 306,481 vm_read_from_parent_iseq_local_count: 4,841,855 guard_type_count: 48,810,089 guard_type_exit_ratio: 2.9% guard_shape_count: 19,485,073 guard_shape_exit_ratio: 9.9% code_region_bytes: 27,262,976 zjit_alloc_bytes: 34,517,324 total_mem_bytes: 61,780,300 side_exit_count: 3,770,125 total_insn_count: 273,152,243 vm_insn_count: 43,926,931 zjit_insn_count: 229,225,312 ratio_in_zjit: 83.9% ```
lobsters after
``` ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (61.7% of total 5,220,252): Hash#fetch: 1,274,409 (24.4%) String#start_with?: 328,017 ( 6.3%) Regexp#match?: 147,525 ( 2.8%) Hash#key?: 139,198 ( 2.7%) Kernel#is_a?: 110,178 ( 2.1%) Class#allocate: 107,143 ( 2.1%) Hash#delete: 106,307 ( 2.0%) Class#superclass: 98,165 ( 1.9%) Set#include?: 97,934 ( 1.9%) Integer#===: 95,874 ( 1.8%) Process.clock_gettime: 92,795 ( 1.8%) String#sub!: 80,732 ( 1.5%) String.new: 80,730 ( 1.5%) SQLite3::Statement#done?: 73,532 ( 1.4%) SQLite3::Statement#step: 73,532 ( 1.4%) Time#plus_without_duration: 66,724 ( 1.3%) String#<<: 63,954 ( 1.2%) Kernel#dup: 62,590 ( 1.2%) Time#to_i: 60,814 ( 1.2%) Time#subsec: 60,363 ( 1.2%) Top-20 calls to C functions from JIT code (80.8% of total 33,681,248): rb_vm_opt_send_without_block: 6,869,559 (20.4%) rb_hash_aref: 2,487,056 ( 7.4%) rb_vm_env_write: 2,372,693 ( 7.0%) rb_zjit_writebarrier_check_immediate: 2,238,890 ( 6.6%) rb_vm_getinstancevariable: 1,861,700 ( 5.5%) rb_ivar_get_at_no_ractor_check: 1,702,246 ( 5.1%) rb_vm_send: 1,468,202 ( 4.4%) rb_hash_aset: 1,267,469 ( 3.8%) rb_obj_is_kind_of: 1,126,363 ( 3.3%) rb_vm_setinstancevariable: 1,055,131 ( 3.1%) Hash#fetch: 987,402 ( 2.9%) rb_vm_opt_getconstant_path: 641,779 ( 1.9%) rb_vm_invokesuper: 603,416 ( 1.8%) rb_vm_invokeblock: 545,743 ( 1.6%) rb_class_allocate_instance: 415,748 ( 1.2%) rb_ec_ary_new_from_values: 380,080 ( 1.1%) String#start_with?: 328,017 ( 1.0%) rb_hash_new_with_size: 289,172 ( 0.9%) fetch: 287,007 ( 0.9%) rb_vm_sendforward: 283,885 ( 0.8%) Top-1 not optimized method types for send (100.0% of total 428): null: 428 (100.0%) Top-3 not optimized method types for send_without_block (100.0% of total 202,329): optimized_send: 190,504 (94.2%) null: 10,844 ( 5.4%) optimized_block_call: 981 ( 0.5%) Top-3 not optimized method types for super (100.0% of total 517,421): cfunc: 489,236 (94.6%) alias: 26,398 ( 5.1%) attrset: 1,787 ( 0.3%) Top-4 instructions with uncategorized fallback reason (100.0% of total 867,452): invokeblock: 545,743 (62.9%) sendforward: 283,885 (32.7%) invokesuperforward: 29,713 ( 3.4%) opt_send_without_block: 8,111 ( 0.9%) Top-20 send fallback reasons (100.0% of total 9,800,518): singleton_class_seen: 3,293,078 (33.6%) send_without_block_no_profiles: 2,142,301 (21.9%) uncategorized: 867,452 ( 8.9%) send_no_profiles: 820,538 ( 8.4%) send_without_block_polymorphic: 780,065 ( 8.0%) one_or_more_complex_arg_pass: 556,514 ( 5.7%) super_not_optimized_method_type: 517,421 ( 5.3%) send_without_block_not_optimized_method_type_optimized: 191,485 ( 2.0%) send_without_block_megamorphic: 161,550 ( 1.6%) too_many_args_for_lir: 127,190 ( 1.3%) send_polymorphic: 111,290 ( 1.1%) send_without_block_not_optimized_need_permission: 99,526 ( 1.0%) super_polymorphic: 45,651 ( 0.5%) super_complex_args_pass: 33,748 ( 0.3%) obj_to_string_not_string: 13,794 ( 0.1%) send_without_block_not_optimized_method_type: 10,844 ( 0.1%) argc_param_mismatch: 9,927 ( 0.1%) send_without_block_direct_keyword_mismatch: 6,336 ( 0.1%) super_target_complex_args_pass: 5,108 ( 0.1%) send_megamorphic: 4,525 ( 0.0%) Top-4 setivar fallback reasons (100.0% of total 1,123,837): not_monomorphic: 1,087,274 (96.7%) not_t_object: 21,354 ( 1.9%) too_complex: 15,188 ( 1.4%) new_shape_needs_extension: 21 ( 0.0%) Top-2 getivar fallback reasons (100.0% of total 2,132,203): not_monomorphic: 2,092,243 (98.1%) too_complex: 39,960 ( 1.9%) Top-3 definedivar fallback reasons (100.0% of total 107,264): not_monomorphic: 105,748 (98.6%) too_complex: 796 ( 0.7%) not_t_object: 720 ( 0.7%) Top-6 invokeblock handler (100.0% of total 545,743): monomorphic_iseq: 249,809 (45.8%) polymorphic: 217,914 (39.9%) monomorphic_ifunc: 46,244 ( 8.5%) monomorphic_other: 27,938 ( 5.1%) megamorphic: 2,943 ( 0.5%) no_profiles: 895 ( 0.2%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 651,185): param_forwardable: 233,989 (35.9%) param_block: 205,158 (31.5%) param_rest: 100,319 (15.4%) param_kwrest: 44,596 ( 6.8%) caller_blockarg: 21,863 ( 3.4%) caller_kw_splat: 20,970 ( 3.2%) caller_splat: 18,106 ( 2.8%) caller_kwarg: 6,184 ( 0.9%) Top-1 compile error reasons (100.0% of total 38,980): exception_handler: 38,980 (100.0%) Top-5 unhandled YARV insns (100.0% of total 4,154): getconstant: 2,566 (61.8%) checkmatch: 929 (22.4%) setblockparam: 443 (10.7%) once: 171 ( 4.1%) expandarray: 45 ( 1.1%) Top-3 unhandled HIR insns (100.0% of total 75,633): throw: 39,447 (52.2%) invokebuiltin: 35,775 (47.3%) array_max: 411 ( 0.5%) Top-20 side exit reasons (100.0% of total 3,734,975): guard_shape_failure: 1,908,302 (51.1%) guard_type_failure: 1,391,624 (37.3%) block_param_proxy_not_iseq_or_ifunc: 246,820 ( 6.6%) unhandled_hir_insn: 75,633 ( 2.0%) compile_error: 38,980 ( 1.0%) patchpoint_stable_constant_names: 25,375 ( 0.7%) block_param_proxy_modified: 13,713 ( 0.4%) fixnum_lshift_overflow: 10,085 ( 0.3%) fixnum_mult_overflow: 8,550 ( 0.2%) unhandled_yarv_insn: 4,154 ( 0.1%) unhandled_block_arg: 2,548 ( 0.1%) unhandled_newarray_send_pack: 2,322 ( 0.1%) patchpoint_no_singleton_class: 2,008 ( 0.1%) patchpoint_no_ep_escape: 1,683 ( 0.0%) obj_to_string_fallback: 1,358 ( 0.0%) expandarray_failure: 837 ( 0.0%) patchpoint_method_redefined: 710 ( 0.0%) guard_less_failure: 163 ( 0.0%) guard_super_method_entry: 53 ( 0.0%) interrupt: 38 ( 0.0%) send_count: 45,128,693 dynamic_send_count: 9,800,518 (21.7%) optimized_send_count: 35,328,175 (78.3%) dynamic_setivar_count: 1,123,837 ( 2.5%) dynamic_getivar_count: 2,132,203 ( 4.7%) dynamic_definedivar_count: 107,264 ( 0.2%) iseq_optimized_send_count: 15,891,453 (35.2%) inline_cfunc_optimized_send_count: 12,866,297 (28.5%) inline_iseq_optimized_send_count: 1,102,971 ( 2.4%) non_variadic_cfunc_optimized_send_count: 2,857,775 ( 6.3%) variadic_cfunc_optimized_send_count: 2,609,679 ( 5.8%) compiled_iseq_count: 5,268 failed_iseq_count: 0 compile_time: 1,558ms profile_time: 10ms gc_time: 13ms invalidation_time: 84ms vm_write_pc_count: 39,300,901 vm_write_sp_count: 39,300,901 vm_write_locals_count: 38,133,357 vm_write_stack_count: 38,133,357 vm_write_to_parent_iseq_local_count: 305,249 vm_read_from_parent_iseq_local_count: 4,818,083 guard_type_count: 48,036,224 guard_type_exit_ratio: 2.9% guard_shape_count: 19,302,903 guard_shape_exit_ratio: 9.9% code_region_bytes: 29,491,200 zjit_alloc_bytes: 34,932,040 total_mem_bytes: 64,423,240 side_exit_count: 3,734,975 total_insn_count: 272,964,960 vm_insn_count: 46,583,034 zjit_insn_count: 226,381,926 ratio_in_zjit: 82.9% ```
--- zjit/src/codegen.rs | 64 +++++++++++++++++++ zjit/src/distribution.rs | 6 +- zjit/src/hir.rs | 91 +++++++++++++++++++++++++++ zjit/src/hir/opt_tests.rs | 128 +++++++++++++++++++++++++++++++++++++- zjit/src/stats.rs | 2 + 5 files changed, 288 insertions(+), 3 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 9276d0af6b81e7..41da154c1ae937 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -526,6 +526,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::UnboxFixnum { val } => gen_unbox_fixnum(asm, opnd!(val)), Insn::Test { val } => gen_test(asm, opnd!(val)), Insn::RefineType { val, .. } => opnd!(val), + Insn::HasType { val, expected } => gen_has_type(asm, opnd!(val), *expected), Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), &Insn::GuardBitEquals { val, expected, reason, state } => gen_guard_bit_equals(jit, asm, opnd!(val), expected, reason, &function.frame_state(state)), @@ -2187,6 +2188,69 @@ fn gen_test(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { asm.csel_e(0.into(), 1.into()) } +fn gen_has_type(asm: &mut Assembler, val: lir::Opnd, ty: Type) -> lir::Opnd { + if ty.is_subtype(types::Fixnum) { + asm.test(val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); + asm.csel_nz(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::Flonum) { + // Flonum: (val & RUBY_FLONUM_MASK) == RUBY_FLONUM_FLAG + let masked = asm.and(val, Opnd::UImm(RUBY_FLONUM_MASK as u64)); + asm.cmp(masked, Opnd::UImm(RUBY_FLONUM_FLAG as u64)); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::StaticSymbol) { + // Static symbols have (val & 0xff) == RUBY_SYMBOL_FLAG + // Use 8-bit comparison like YJIT does. GuardType should not be used + // for a known VALUE, which with_num_bits() does not support. + asm.cmp(val.with_num_bits(8), Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::NilClass) { + asm.cmp(val, Qnil.into()); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::TrueClass) { + asm.cmp(val, Qtrue.into()); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_subtype(types::FalseClass) { + asm.cmp(val, Qfalse.into()); + asm.csel_e(Opnd::Imm(1), Opnd::Imm(0)) + } else if ty.is_immediate() { + // All immediate types' guard should have been handled above + panic!("unexpected immediate guard type: {ty}"); + } else if let Some(expected_class) = ty.runtime_exact_ruby_class() { + // If val isn't in a register, load it to use it as the base of Opnd::mem later. + // TODO: Max thinks codegen should not care about the shapes of the operands except to create them. (Shopify/ruby#685) + let val = match val { + Opnd::Reg(_) | Opnd::VReg { .. } => val, + _ => asm.load(val), + }; + + let ret_label = asm.new_label("true"); + let false_label = asm.new_label("false"); + + // Check if it's a special constant + asm.test(val, (RUBY_IMMEDIATE_MASK as u64).into()); + asm.jnz(false_label.clone()); + + // Check if it's false + asm.cmp(val, Qfalse.into()); + asm.je(false_label.clone()); + + // Load the class from the object's klass field + let klass = asm.load(Opnd::mem(64, val, RUBY_OFFSET_RBASIC_KLASS)); + asm.cmp(klass, Opnd::Value(expected_class)); + asm.jmp(ret_label.clone()); + + // If we get here then the value was false, unset the Z flag + // so that csel_e will select false instead of true + asm.write_label(false_label); + asm.test(Opnd::UImm(1), Opnd::UImm(1)); + + asm.write_label(ret_label); + asm.csel_e(Opnd::UImm(1), Opnd::Imm(0)) + } else { + unimplemented!("unsupported type: {ty}"); + } +} + /// Compile a type check with a side exit fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard_type: Type, state: &FrameState) -> lir::Opnd { gen_incr_counter(asm, Counter::guard_type_count); diff --git a/zjit/src/distribution.rs b/zjit/src/distribution.rs index 2c6ffb3ae6fff0..9b3920396a13b9 100644 --- a/zjit/src/distribution.rs +++ b/zjit/src/distribution.rs @@ -69,7 +69,7 @@ enum DistributionKind { SkewedMegamorphic, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct DistributionSummary { kind: DistributionKind, buckets: [T; N], @@ -134,6 +134,10 @@ impl Distributi assert!(idx < N, "index {idx} out of bounds for buckets[{N}]"); self.buckets[idx] } + + pub fn buckets(&self) -> &[T] { + &self.buckets + } } #[cfg(test)] diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 32519a5b978847..24d04f59663291 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -626,6 +626,7 @@ pub enum SendFallbackReason { SendWithoutBlockNotOptimizedNeedPermission, SendWithoutBlockBopRedefined, SendWithoutBlockOperandsNotFixnum, + SendWithoutBlockPolymorphicFallback, SendDirectKeywordMismatch, SendDirectKeywordCountMismatch, SendDirectMissingKeyword, @@ -687,6 +688,7 @@ impl Display for SendFallbackReason { SendNotOptimizedNeedPermission => write!(f, "Send: method private or protected and no FCALL"), SendWithoutBlockBopRedefined => write!(f, "SendWithoutBlock: basic operation was redefined"), SendWithoutBlockOperandsNotFixnum => write!(f, "SendWithoutBlock: operands are not fixnums"), + SendWithoutBlockPolymorphicFallback => write!(f, "SendWithoutBlock: polymorphic fallback"), SendDirectKeywordMismatch => write!(f, "SendDirect: keyword mismatch"), SendDirectKeywordCountMismatch => write!(f, "SendDirect: keyword count mismatch"), SendDirectMissingKeyword => write!(f, "SendDirect: missing keyword"), @@ -1016,6 +1018,8 @@ pub enum Insn { /// Refine the known type information of with additional type information. /// Computes the intersection of the existing type and the new type. RefineType { val: InsnId, new_type: Type }, + /// Return CBool[true] if val has type Type and CBool[false] otherwise. + HasType { val: InsnId, expected: Type }, /// Side-exit if val doesn't have the expected type. GuardType { val: InsnId, guard_type: Type, state: InsnId }, @@ -1242,6 +1246,7 @@ impl Insn { Insn::CheckInterrupts { .. } => effects::Any, Insn::InvokeProc { .. } => effects::Any, Insn::RefineType { .. } => effects::Empty, + Insn::HasType { .. } => effects::Empty, } } @@ -1546,6 +1551,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::FixnumRShift { left, right, .. } => { write!(f, "FixnumRShift {left}, {right}") }, Insn::GuardType { val, guard_type, .. } => { write!(f, "GuardType {val}, {}", guard_type.print(self.ptr_map)) }, Insn::RefineType { val, new_type, .. } => { write!(f, "RefineType {val}, {}", new_type.print(self.ptr_map)) }, + Insn::HasType { val, expected, .. } => { write!(f, "HasType {val}, {}", expected.print(self.ptr_map)) }, Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, Insn::GuardAnyBitSet { val, mask, .. } => { write!(f, "GuardBitSet {val}, {}", mask.print(self.ptr_map)) }, @@ -2235,6 +2241,7 @@ impl Function { &IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) }, &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) }, &RefineType { val, new_type } => RefineType { val: find!(val), new_type }, + &HasType { val, expected } => HasType { val: find!(val), expected }, &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type, state }, &GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state }, &GuardBitEquals { val, expected, reason, state } => GuardBitEquals { val: find!(val), expected, reason, state }, @@ -2497,6 +2504,7 @@ impl Function { &Insn::CCallVariadic { return_type, .. } => return_type, Insn::GuardType { val, guard_type, .. } => self.type_of(*val).intersection(*guard_type), Insn::RefineType { val, new_type, .. } => self.type_of(*val).intersection(*new_type), + Insn::HasType { .. } => types::CBool, Insn::GuardTypeNot { .. } => types::BasicObject, Insn::GuardBitEquals { val, expected, .. } => self.type_of(*val).intersection(Type::from_const(*expected)), Insn::GuardAnyBitSet { val, .. } => self.type_of(*val), @@ -2860,6 +2868,22 @@ impl Function { self.resolve_receiver_type_from_profile(recv, insn_idx) } + fn polymorphic_summary(&self, profiles: &ProfileOracle, recv: InsnId, insn_idx: usize) -> Option { + let Some(entries) = profiles.types.get(&insn_idx) else { + return None; + }; + let recv = self.chase_insn(recv); + for (entry_insn, entry_type_summary) in entries { + if self.union_find.borrow().find_const(*entry_insn) == recv { + if entry_type_summary.is_polymorphic() { + return Some(entry_type_summary.clone()); + } + return None; + } + } + None + } + /// Resolve the receiver type for method dispatch optimization from profile data. /// /// Returns: @@ -4608,6 +4632,7 @@ impl Function { worklist.push_back(state); } | &Insn::RefineType { val, .. } + | &Insn::HasType { val, .. } | &Insn::Return { val } | &Insn::Test { val } | &Insn::SetLocal { val, .. } @@ -5579,6 +5604,7 @@ impl Function { self.assert_subtype(insn_id, class, types::Class) } Insn::RefineType { .. } => Ok(()), + Insn::HasType { val, .. } => self.assert_subtype(insn_id, val, types::BasicObject), } } @@ -6939,6 +6965,71 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; + { + fn new_branch_block( + fun: &mut Function, + cd: *const rb_call_data, + argc: usize, + opcode: u32, + new_type: Type, + insn_idx: u32, + exit_state: &FrameState, + locals_count: usize, + stack_count: usize, + join_block: BlockId, + ) -> BlockId { + let block = fun.new_block(insn_idx); + let self_param = fun.push_insn(block, Insn::Param); + let mut state = exit_state.clone(); + state.locals.clear(); + state.stack.clear(); + state.locals.extend((0..locals_count).map(|_| fun.push_insn(block, Insn::Param))); + state.stack.extend((0..stack_count).map(|_| fun.push_insn(block, Insn::Param))); + let snapshot = fun.push_insn(block, Insn::Snapshot { state: state.clone() }); + let args = state.stack_pop_n(argc).unwrap(); + let recv = state.stack_pop().unwrap(); + let refined_recv = fun.push_insn(block, Insn::RefineType { val: recv, new_type }); + state.replace(recv, refined_recv); + let send = fun.push_insn(block, Insn::SendWithoutBlock { recv: refined_recv, cd, args, state: snapshot, reason: Uncategorized(opcode) }); + state.stack_push(send); + fun.push_insn(block, Insn::Jump(BranchEdge { target: join_block, args: state.as_args(self_param) })); + block + } + let branch_insn_idx = exit_state.insn_idx as u32; + let locals_count = state.locals.len(); + let stack_count = state.stack.len(); + let recv = state.stack_topn(argc as usize)?; // args are on top + let entry_args = state.as_args(self_param); + if let Some(summary) = fun.polymorphic_summary(&profiles, recv, exit_state.insn_idx) { + let join_block = insn_idx_to_block.get(&insn_idx).copied().unwrap_or_else(|| fun.new_block(insn_idx)); + // TODO(max): Only iterate over unique classes, not unique (class, shape) pairs. + for &profiled_type in summary.buckets() { + if profiled_type.is_empty() { break; } + let expected = Type::from_profiled_type(profiled_type); + let has_type = fun.push_insn(block, Insn::HasType { val: recv, expected }); + let iftrue_block = + new_branch_block(&mut fun, cd, argc as usize, opcode, expected, branch_insn_idx, &exit_state, locals_count, stack_count, join_block); + let target = BranchEdge { target: iftrue_block, args: entry_args.clone() }; + fun.push_insn(block, Insn::IfTrue { val: has_type, target }); + } + // Continue compilation from the join block at the next instruction. + // Make a copy of the current state without the args (pop the receiver + // and push the result) because we just use the locals/stack sizes to + // make the right number of Params + let mut join_state = state.clone(); + join_state.stack_pop_n(argc as usize)?; + queue.push_back((join_state, join_block, insn_idx, local_inval)); + // In the fallthrough case, do a generic interpreter send and then join. + let args = state.stack_pop_n(argc as usize)?; + let recv = state.stack_pop()?; + let reason = SendWithoutBlockPolymorphicFallback; + let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id, reason }); + state.stack_push(send); + fun.push_insn(block, Insn::Jump(BranchEdge { target: join_block, args: state.as_args(self_param) })); + break; // End the block + } + } + let args = state.stack_pop_n(argc as usize)?; let recv = state.stack_pop()?; let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id, reason: Uncategorized(opcode) }); diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index c1059094ac531a..06bf561d166d62 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -6407,9 +6407,29 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): - v14:BasicObject = SendWithoutBlock v9, :foo # SendFallbackReason: Uncategorized(opt_send_without_block) + v14:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v14, bb4(v8, v9, v9) + v23:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v23, bb5(v8, v9, v9) + v32:BasicObject = SendWithoutBlock v9, :foo # SendFallbackReason: SendWithoutBlock: polymorphic fallback + Jump bb3(v8, v9, v32) + bb4(v15:BasicObject, v16:BasicObject, v17:BasicObject): + v19:HeapObject[class_exact:C] = RefineType v17, HeapObject[class_exact:C] + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + IncrCounter getivar_fallback_not_monomorphic + v44:BasicObject = GetIvar v19, :@foo + Jump bb3(v15, v16, v44) + bb5(v24:BasicObject, v25:BasicObject, v26:BasicObject): + v28:HeapObject[class_exact:C] = RefineType v26, HeapObject[class_exact:C] + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + IncrCounter getivar_fallback_not_monomorphic + v47:BasicObject = GetIvar v28, :@foo + Jump bb3(v24, v25, v47) + bb3(v34:BasicObject, v35:BasicObject, v36:BasicObject): CheckInterrupts - Return v14 + Return v36 "); } @@ -11487,4 +11507,108 @@ mod hir_opt_tests { Return v47 "); } + + #[test] + fn specialize_polymorphic_send_iseq() { + set_call_threshold(4); + eval(" + class C + def foo = 3 + end + + class D + def foo = 4 + end + + def test o + o.foo + 2 + end + + test C.new; test D.new; test C.new; test D.new + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:11: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :o, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v14, bb4(v8, v9, v9) + v23:CBool = HasType v9, HeapObject[class_exact:D] + IfTrue v23, bb5(v8, v9, v9) + v32:BasicObject = SendWithoutBlock v9, :foo # SendFallbackReason: SendWithoutBlock: polymorphic fallback + Jump bb3(v8, v9, v32) + bb4(v15:BasicObject, v16:BasicObject, v17:BasicObject): + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + IncrCounter inline_iseq_optimized_send_count + v54:Fixnum[3] = Const Value(3) + Jump bb3(v15, v16, v54) + bb5(v24:BasicObject, v25:BasicObject, v26:BasicObject): + PatchPoint NoSingletonClass(D@0x1038) + PatchPoint MethodRedefined(D@0x1038, foo@0x1008, cme:0x1040) + IncrCounter inline_iseq_optimized_send_count + v56:Fixnum[4] = Const Value(4) + Jump bb3(v24, v25, v56) + bb3(v34:BasicObject, v35:BasicObject, v36:BasicObject): + v39:Fixnum[2] = Const Value(2) + PatchPoint MethodRedefined(Integer@0x1068, +@0x1070, cme:0x1078) + v59:Fixnum = GuardType v36, Fixnum + v60:Fixnum = FixnumAdd v59, v39 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v60 + "); + } + + #[test] + fn specialize_polymorphic_send_with_immediate() { + set_call_threshold(4); + eval(" + class C; end + + def test o + o.itself + end + + test C.new; test 3; test C.new; test 4 + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:5: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :o, l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:CBool = HasType v9, HeapObject[class_exact:C] + IfTrue v14, bb4(v8, v9, v9) + v23:CBool = HasType v9, Fixnum + IfTrue v23, bb5(v8, v9, v9) + v32:BasicObject = SendWithoutBlock v9, :itself # SendFallbackReason: SendWithoutBlock: polymorphic fallback + Jump bb3(v8, v9, v32) + bb4(v15:BasicObject, v16:BasicObject, v17:BasicObject): + v19:HeapObject[class_exact:C] = RefineType v17, HeapObject[class_exact:C] + PatchPoint NoSingletonClass(C@0x1000) + PatchPoint MethodRedefined(C@0x1000, itself@0x1008, cme:0x1010) + IncrCounter inline_cfunc_optimized_send_count + Jump bb3(v15, v16, v19) + bb5(v24:BasicObject, v25:BasicObject, v26:BasicObject): + v28:Fixnum = RefineType v26, Fixnum + PatchPoint MethodRedefined(Integer@0x1038, itself@0x1008, cme:0x1010) + IncrCounter inline_cfunc_optimized_send_count + Jump bb3(v24, v25, v28) + bb3(v34:BasicObject, v35:BasicObject, v36:BasicObject): + CheckInterrupts + Return v36 + "); + } } diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 367a19fc32fc67..6fc754007f6a82 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -229,6 +229,7 @@ make_counters! { send_fallback_too_many_args_for_lir, send_fallback_send_without_block_bop_redefined, send_fallback_send_without_block_operands_not_fixnum, + send_fallback_send_without_block_polymorphic_fallback, send_fallback_send_without_block_direct_keyword_mismatch, send_fallback_send_without_block_direct_keyword_count_mismatch, send_fallback_send_without_block_direct_missing_keyword, @@ -610,6 +611,7 @@ pub fn send_fallback_counter(reason: crate::hir::SendFallbackReason) -> Counter TooManyArgsForLir => send_fallback_too_many_args_for_lir, SendWithoutBlockBopRedefined => send_fallback_send_without_block_bop_redefined, SendWithoutBlockOperandsNotFixnum => send_fallback_send_without_block_operands_not_fixnum, + SendWithoutBlockPolymorphicFallback => send_fallback_send_without_block_polymorphic_fallback, SendDirectKeywordMismatch => send_fallback_send_without_block_direct_keyword_mismatch, SendDirectKeywordCountMismatch => send_fallback_send_without_block_direct_keyword_count_mismatch, SendDirectMissingKeyword => send_fallback_send_without_block_direct_missing_keyword, From b51a11268840d257dade3dcb573b680c004a5a1b Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Thu, 29 Jan 2026 20:12:01 +0100 Subject: [PATCH 62/77] IO::Buffer.map seems not shareable across processes on OpenBSD * See https://rubyci.s3.amazonaws.com/openbsd-current/ruby-master/log/20260129T163005Z.fail.html.gz --- spec/ruby/core/io/buffer/map_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/ruby/core/io/buffer/map_spec.rb b/spec/ruby/core/io/buffer/map_spec.rb index d60036307f8774..4543c2d022a7ce 100644 --- a/spec/ruby/core/io/buffer/map_spec.rb +++ b/spec/ruby/core/io/buffer/map_spec.rb @@ -73,7 +73,7 @@ def open_big_file_fixture @buffer.should.valid? end - platform_is_not :windows do + platform_is_not :windows, :openbsd do it "is shareable across processes" do file_name = tmp("shared_buffer") @file = File.open(file_name, "w+") From acc414572c213f2690eee56bac3d2355a24e5930 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Thu, 29 Jan 2026 14:24:38 -0500 Subject: [PATCH 63/77] ZJIT: Support inlining send-with-block (#15998) Autosplat only happens due to `yield` or `.call`, neither of which is permitted in our trivial inliner. --- zjit/src/hir.rs | 6 +- zjit/src/hir/opt_tests.rs | 119 +++++++++++++++++++++++++++++++++++++- 2 files changed, 122 insertions(+), 3 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 24d04f59663291..3c96f1d3aa3c31 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -3643,8 +3643,10 @@ impl Function { assert!(self.blocks[block.0].insns.is_empty()); for insn_id in old_insns { match self.find(insn_id) { - // Reject block ISEQs to avoid autosplat and other block parameter complications. - Insn::SendDirect { recv, iseq, cd, args, state, blockiseq: None, .. } => { + // We can inline SendDirect with blockiseq because we are prohibiting `yield` + // and `.call`, which would trigger autosplat. We only inline constants and + // variables and builtin calls. + Insn::SendDirect { recv, iseq, cd, args, state, .. } => { let call_info = unsafe { (*cd).ci }; let ci_flags = unsafe { vm_ci_flag(call_info) }; // .send call is not currently supported for builtins diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 06bf561d166d62..a8a1a617a3859d 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -2930,7 +2930,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Object@0x1000) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v31:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v8, HeapObject[class_exact*:Object@VALUE(0x1000)] - v32:BasicObject = SendDirect v31, 0x1038, :foo (0x1048) + IncrCounter inline_iseq_optimized_send_count v20:BasicObject = GetLocal :a, l0, EP@3 v24:BasicObject = GetLocal :a, l0, EP@3 CheckInterrupts @@ -9545,6 +9545,123 @@ mod hir_opt_tests { "); } + #[test] + fn test_inline_send_with_block_with_no_params() { + eval(r#" + def callee = 123 + def test + callee do + end + end + test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) + v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + IncrCounter inline_iseq_optimized_send_count + v21:Fixnum[123] = Const Value(123) + CheckInterrupts + Return v21 + "); + } + + #[test] + fn test_inline_send_with_block_with_one_param() { + eval(r#" + def callee = 123 + def test + callee do |_| + end + end + test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) + v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + IncrCounter inline_iseq_optimized_send_count + v21:Fixnum[123] = Const Value(123) + CheckInterrupts + Return v21 + "); + } + + #[test] + fn test_inline_send_with_block_with_multiple_params() { + eval(r#" + def callee = 123 + def test + callee do |_a, _b| + end + end + test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint NoSingletonClass(Object@0x1000) + PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) + v18:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + IncrCounter inline_iseq_optimized_send_count + v21:Fixnum[123] = Const Value(123) + CheckInterrupts + Return v21 + "); + } + + #[test] + fn test_no_inline_send_with_symbol_block() { + eval(r#" + def callee = 123 + public def the_block = 456 + def test + callee(&:the_block) + end + puts test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:5: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:StaticSymbol[:the_block] = Const Value(VALUE(0x1000)) + v13:BasicObject = Send v6, 0x1008, :callee, v11 # SendFallbackReason: Uncategorized(send) + CheckInterrupts + Return v13 + "); + } + #[test] fn test_optimize_stringexact_eq_stringexact() { eval(r#" From cfa3a4a7d1e9552e9105b4e9ae13b349a5c78b20 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Thu, 29 Jan 2026 14:59:33 -0500 Subject: [PATCH 64/77] ZJIT: Make printout and identifier name consistent for `Guard{AnyBit,NoBits}Set` --- zjit/src/hir.rs | 4 ++-- zjit/src/hir/opt_tests.rs | 22 +++++++++++----------- zjit/src/hir/tests.rs | 12 ++++++------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 3c96f1d3aa3c31..8a9d5a5bb09f90 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -1554,8 +1554,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::HasType { val, expected, .. } => { write!(f, "HasType {val}, {}", expected.print(self.ptr_map)) }, Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, - Insn::GuardAnyBitSet { val, mask, .. } => { write!(f, "GuardBitSet {val}, {}", mask.print(self.ptr_map)) }, - Insn::GuardNoBitsSet { val, mask, .. } => { write!(f, "GuardBitNotSet {val}, {}", mask.print(self.ptr_map)) }, + Insn::GuardAnyBitSet { val, mask, .. } => { write!(f, "GuardAnyBitSet {val}, {}", mask.print(self.ptr_map)) }, + Insn::GuardNoBitsSet { val, mask, .. } => { write!(f, "GuardNoBitsSet {val}, {}", mask.print(self.ptr_map)) }, &Insn::GuardShape { val, shape, .. } => { write!(f, "GuardShape {val}, {:p}", self.ptr_map.map_shape(shape)) }, Insn::GuardNotFrozen { recv, .. } => write!(f, "GuardNotFrozen {recv}"), Insn::GuardNotShared { recv, .. } => write!(f, "GuardNotShared {recv}"), diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index a8a1a617a3859d..29b1e36331b02d 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -3875,7 +3875,7 @@ mod hir_opt_tests { eval(" def test(&block) = tap(&block) "); - assert_snapshot!(hir_string("test"), @" + assert_snapshot!(hir_string("test"), @r" fn test@:2: bb0(): EntryPoint interpreter @@ -3888,9 +3888,9 @@ mod hir_opt_tests { bb2(v8:BasicObject, v9:BasicObject): v14:CPtr = GetEP 0 v15:CInt64 = LoadField v14, :_env_data_index_flags@0x1000 - v16:CInt64 = GuardBitNotSet v15, CUInt64(512) + v16:CInt64 = GuardNoBitsSet v15, CUInt64(512) v17:CInt64 = LoadField v14, :_env_data_index_specval@0x1001 - v18:CInt64 = GuardBitSet v17, CUInt64(1) + v18:CInt64 = GuardAnyBitSet v17, CUInt64(1) v19:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) v21:BasicObject = Send v8, 0x1010, :tap, v19 # SendFallbackReason: Uncategorized(send) CheckInterrupts @@ -6546,7 +6546,7 @@ mod hir_opt_tests { def test(&block) = [].map(&block) test { |x| x }; test { |x| x } "#); - assert_snapshot!(hir_string("test"), @" + assert_snapshot!(hir_string("test"), @r" fn test@:2: bb0(): EntryPoint interpreter @@ -6560,9 +6560,9 @@ mod hir_opt_tests { v13:ArrayExact = NewArray v15:CPtr = GetEP 0 v16:CInt64 = LoadField v15, :_env_data_index_flags@0x1000 - v17:CInt64 = GuardBitNotSet v16, CUInt64(512) + v17:CInt64 = GuardNoBitsSet v16, CUInt64(512) v18:CInt64 = LoadField v15, :_env_data_index_specval@0x1001 - v19:CInt64 = GuardBitSet v18, CUInt64(1) + v19:CInt64 = GuardAnyBitSet v18, CUInt64(1) v20:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) IncrCounter complex_arg_pass_caller_blockarg v22:BasicObject = Send v13, 0x1010, :map, v20 # SendFallbackReason: Complex argument passing @@ -6577,7 +6577,7 @@ mod hir_opt_tests { def test(&block) = [].map(&block) test; test "#); - assert_snapshot!(hir_string("test"), @" + assert_snapshot!(hir_string("test"), @r" fn test@:2: bb0(): EntryPoint interpreter @@ -6591,7 +6591,7 @@ mod hir_opt_tests { v13:ArrayExact = NewArray v15:CPtr = GetEP 0 v16:CInt64 = LoadField v15, :_env_data_index_flags@0x1000 - v17:CInt64 = GuardBitNotSet v16, CUInt64(512) + v17:CInt64 = GuardNoBitsSet v16, CUInt64(512) v18:CInt64 = LoadField v15, :_env_data_index_specval@0x1001 v19:CInt64[0] = GuardBitEquals v18, CInt64(0) v20:NilClass = Const Value(nil) @@ -6612,7 +6612,7 @@ mod hir_opt_tests { end test; test "#); - assert_snapshot!(hir_string_proc("test"), @" + assert_snapshot!(hir_string_proc("test"), @r" fn block in test@:4: bb0(): EntryPoint interpreter @@ -6625,9 +6625,9 @@ mod hir_opt_tests { v10:ArrayExact = NewArray v12:CPtr = GetEP 1 v13:CInt64 = LoadField v12, :_env_data_index_flags@0x1000 - v14:CInt64 = GuardBitNotSet v13, CUInt64(512) + v14:CInt64 = GuardNoBitsSet v13, CUInt64(512) v15:CInt64 = LoadField v12, :_env_data_index_specval@0x1001 - v16:CInt64 = GuardBitSet v15, CUInt64(1) + v16:CInt64 = GuardAnyBitSet v15, CUInt64(1) v17:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) IncrCounter complex_arg_pass_caller_blockarg v19:BasicObject = Send v10, 0x1010, :map, v17 # SendFallbackReason: Complex argument passing diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index dbab964976f91a..5b97a61d80dd0d 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -2038,7 +2038,7 @@ pub mod hir_build_tests { eval(" def test(a, ...) = foo(a, ...) "); - assert_snapshot!(hir_string("test"), @" + assert_snapshot!(hir_string("test"), @r" fn test@:2: bb0(): EntryPoint interpreter @@ -2058,9 +2058,9 @@ pub mod hir_build_tests { PatchPoint NoEPEscape(test) v33:CPtr = GetEP 0 v34:CInt64 = LoadField v33, :_env_data_index_flags@0x1000 - v35:CInt64 = GuardBitNotSet v34, CUInt64(512) + v35:CInt64 = GuardNoBitsSet v34, CUInt64(512) v36:CInt64 = LoadField v33, :_env_data_index_specval@0x1001 - v37:CInt64 = GuardBitSet v36, CUInt64(1) + v37:CInt64 = GuardAnyBitSet v36, CUInt64(1) v38:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) SideExit UnhandledYARVInsn(splatkw) "); @@ -3413,7 +3413,7 @@ pub mod hir_build_tests { let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("Dir", "open")); assert!(iseq_contains_opcode(iseq, YARVINSN_opt_invokebuiltin_delegate), "iseq Dir.open does not contain invokebuiltin"); let function = iseq_to_hir(iseq).unwrap(); - assert_snapshot!(hir_string_function(&function), @" + assert_snapshot!(hir_string_function(&function), @r" fn open@: bb0(): EntryPoint interpreter @@ -3434,9 +3434,9 @@ pub mod hir_build_tests { PatchPoint NoEPEscape(open) v31:CPtr = GetEP 0 v32:CInt64 = LoadField v31, :_env_data_index_flags@0x1000 - v33:CInt64 = GuardBitNotSet v32, CUInt64(512) + v33:CInt64 = GuardNoBitsSet v32, CUInt64(512) v34:CInt64 = LoadField v31, :_env_data_index_specval@0x1001 - v35:CInt64 = GuardBitSet v34, CUInt64(1) + v35:CInt64 = GuardAnyBitSet v34, CUInt64(1) v36:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1008)) CheckInterrupts v39:CBool[true] = Test v36 From d9cc3c278b3535a9eefd0e573e72f0cdc3fec1f1 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Thu, 29 Jan 2026 12:43:01 -0800 Subject: [PATCH 65/77] ZJIT: Remove PadPatchPoint instructions when lowering to LIR (#15974) Basic blocks in LIR should only ever end in control flow instructions such as jump or return. PadPatchPoint is not control flow, so we should not emit it at the end of blocks when lowering. --- zjit/src/backend/arm64/mod.rs | 3 ++- zjit/src/backend/lir.rs | 21 ++++++++++++++++++++- zjit/src/backend/x86_64/mod.rs | 3 ++- zjit/src/codegen.rs | 10 ++++++++-- 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index ee15627d898dd9..a1836ea9dfb3a4 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -1752,7 +1752,7 @@ mod tests { asm.cret(val64); asm.frame_teardown(JIT_PRESERVED_REGS); - assert_disasm_snapshot!(lir_string(&mut asm), @r" + assert_disasm_snapshot!(lir_string(&mut asm), @" bb0: # bb0(): foo@/tmp/a.rb:1 FrameSetup 1, x19, x21, x20 @@ -1765,6 +1765,7 @@ mod tests { Je bb0 CRet v0 FrameTeardown x19, x21, x20 + PadPatchPoint "); } diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index f0fcece8a1a7d3..b2ec95a9d4a84c 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -1745,11 +1745,25 @@ impl Assembler // Emit instructions with labels, expanding branch parameters let mut insns = Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY); - for block in self.sorted_blocks() { + let blocks = self.sorted_blocks(); + let num_blocks = blocks.len(); + + for (block_id, block) in blocks.iter().enumerate() { + // Entry blocks shouldn't ever be preceded by something that can + // stomp on this block. + if !block.is_entry { + insns.push(Insn::PadPatchPoint); + } + // Process each instruction, expanding branch params if needed for insn in &block.insns { self.expand_branch_insn(insn, &mut insns); } + + // Make sure we don't stomp on the next function + if block_id == num_blocks - 1 { + insns.push(Insn::PadPatchPoint); + } } insns } @@ -2211,6 +2225,11 @@ impl Assembler fn compile_exit(asm: &mut Assembler, exit: &SideExit) { let SideExit { pc, stack, locals } = exit; + // Side exit blocks are not part of the CFG at the moment, + // so we need to manually ensure that patchpoints get padded + // so that nobody stomps on us + asm.pad_patch_point(); + asm_comment!(asm, "save cfp->pc"); asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), *pc); diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index b045e0f3a3d04b..d55dce1b9b82e0 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -1173,7 +1173,7 @@ mod tests { asm.cret(val64); asm.frame_teardown(JIT_PRESERVED_REGS); - assert_disasm_snapshot!(lir_string(&mut asm), @r" + assert_disasm_snapshot!(lir_string(&mut asm), @" bb0: # bb0(): foo@/tmp/a.rb:1 FrameSetup 1, r13, rbx, r12 @@ -1186,6 +1186,7 @@ mod tests { Je bb0 CRet v0 FrameTeardown r13, rbx, r12 + PadPatchPoint "); } diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 41da154c1ae937..2038be808dc633 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -333,6 +333,8 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func }; gen_if_false(&mut asm, val_opnd, branch_edge, fall_through_edge); + assert!(asm.current_block().insns.last().unwrap().is_terminator()); + asm.set_current_block(fall_through_target); let label = jit.get_label(&mut asm, fall_through_target, block_id); @@ -356,6 +358,8 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func }; gen_if_true(&mut asm, val_opnd, branch_edge, fall_through_edge); + assert!(asm.current_block().insns.last().unwrap().is_terminator()); + asm.set_current_block(fall_through_target); let label = jit.get_label(&mut asm, fall_through_target, block_id); @@ -368,6 +372,8 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func args: target.args.iter().map(|insn_id| jit.get_opnd(*insn_id)).collect() }; gen_jump(&mut asm, branch_edge); + assert!(asm.current_block().insns.last().unwrap().is_terminator()); + }, _ => { if let Err(last_snapshot) = gen_insn(cb, &mut jit, &mut asm, function, insn_id, &insn) { @@ -382,8 +388,8 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, version: IseqVersionRef, func } } } - // Make sure the last patch point has enough space to insert a jump - asm.pad_patch_point(); + // Blocks should always end with control flow + assert!(asm.current_block().insns.last().unwrap().is_terminator()); } // Generate code if everything can be compiled From 457bb11aa5b2ce4424b611acb489686d130261de Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 26 Jan 2026 21:20:38 -0500 Subject: [PATCH 66/77] [ruby/mmtk] Add weak_references_count to GC.stat https://github.com/ruby/mmtk/commit/86fa2fd4af --- gc/mmtk/mmtk.c | 3 +++ gc/mmtk/mmtk.h | 2 ++ gc/mmtk/src/api.rs | 5 +++++ gc/mmtk/src/weak_proc.rs | 4 ++++ 4 files changed, 14 insertions(+) diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c index b8af39cd993d15..d5cfda1be949d9 100644 --- a/gc/mmtk/mmtk.c +++ b/gc/mmtk/mmtk.c @@ -1410,6 +1410,7 @@ enum gc_stat_sym { gc_stat_sym_free_bytes, gc_stat_sym_starting_heap_address, gc_stat_sym_last_heap_address, + gc_stat_sym_weak_references_count, gc_stat_sym_last }; @@ -1428,6 +1429,7 @@ setup_gc_stat_symbols(void) S(free_bytes); S(starting_heap_address); S(last_heap_address); + S(weak_references_count); } } @@ -1463,6 +1465,7 @@ rb_gc_impl_stat(void *objspace_ptr, VALUE hash_or_sym) SET(free_bytes, mmtk_free_bytes()); SET(starting_heap_address, (size_t)mmtk_starting_heap_address()); SET(last_heap_address, (size_t)mmtk_last_heap_address()); + SET(weak_references_count, mmtk_weak_references_count()); #undef SET if (!NIL_P(key)) { diff --git a/gc/mmtk/mmtk.h b/gc/mmtk/mmtk.h index 4cef1668a4fc4c..ffbad1a025cce0 100644 --- a/gc/mmtk/mmtk.h +++ b/gc/mmtk/mmtk.h @@ -129,6 +129,8 @@ void mmtk_declare_weak_references(MMTk_ObjectReference object); bool mmtk_weak_references_alive_p(MMTk_ObjectReference object); +size_t mmtk_weak_references_count(void); + void mmtk_register_pinning_obj(MMTk_ObjectReference obj); void mmtk_object_reference_write_post(MMTk_Mutator *mutator, MMTk_ObjectReference object); diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs index 3515a2408b3714..5eac068672b549 100644 --- a/gc/mmtk/src/api.rs +++ b/gc/mmtk/src/api.rs @@ -317,6 +317,11 @@ pub extern "C" fn mmtk_weak_references_alive_p(object: ObjectReference) -> bool object.is_reachable() } +#[no_mangle] +pub extern "C" fn mmtk_weak_references_count() -> usize { + binding().weak_proc.weak_references_count() +} + // =============== Compaction =============== #[no_mangle] diff --git a/gc/mmtk/src/weak_proc.rs b/gc/mmtk/src/weak_proc.rs index d0a54f01bf6e81..f103822b737272 100644 --- a/gc/mmtk/src/weak_proc.rs +++ b/gc/mmtk/src/weak_proc.rs @@ -92,6 +92,10 @@ impl WeakProcessor { weak_references.push(object); } + pub fn weak_references_count(&self) -> usize { + self.weak_references.lock().unwrap().len() + } + pub fn process_weak_stuff( &self, worker: &mut GCWorker, From 91619f0230c0e5a95c796c1bd4f784c151e15614 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 28 Jan 2026 13:39:33 +0100 Subject: [PATCH 67/77] gc.c: Verify provided size in `rb_gc_impl_free` For now the provided size is just for GC statistics, but in the future we may want to forward it to C23's `free_sized` and passing an incorrect size to it is undefined behavior. --- array.c | 5 +++-- ext/-test-/string/cstr.c | 3 ++- gc/default/default.c | 20 ++++++++++++++++++++ imemo.c | 14 +++++++------- internal/gc.h | 32 -------------------------------- internal/imemo.h | 2 +- prism_compile.c | 2 +- set.c | 4 ++-- string.c | 6 +++--- vm_eval.c | 2 +- 10 files changed, 40 insertions(+), 50 deletions(-) diff --git a/array.c b/array.c index 4496dde2626500..4a4c44562d28bf 100644 --- a/array.c +++ b/array.c @@ -387,13 +387,14 @@ rb_ary_make_embedded(VALUE ary) if (!ARY_EMBED_P(ary)) { const VALUE *buf = ARY_HEAP_PTR(ary); long len = ARY_HEAP_LEN(ary); + long capa = ARY_HEAP_CAPA(ary); FL_SET_EMBED(ary); ARY_SET_EMBED_LEN(ary, len); MEMCPY((void *)ARY_EMBED_PTR(ary), (void *)buf, VALUE, len); - ary_heap_free_ptr(ary, buf, len * sizeof(VALUE)); + ary_heap_free_ptr(ary, buf, capa * sizeof(VALUE)); } } @@ -428,7 +429,7 @@ ary_resize_capa(VALUE ary, long capacity) if (len > capacity) len = capacity; MEMCPY((VALUE *)RARRAY(ary)->as.ary, ptr, VALUE, len); - ary_heap_free_ptr(ary, ptr, old_capa); + ary_heap_free_ptr(ary, ptr, old_capa * sizeof(VALUE)); FL_SET_EMBED(ary); ARY_SET_LEN(ary, len); diff --git a/ext/-test-/string/cstr.c b/ext/-test-/string/cstr.c index b0b1ef5374a46b..931220b46bdc40 100644 --- a/ext/-test-/string/cstr.c +++ b/ext/-test-/string/cstr.c @@ -111,9 +111,10 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str) FL_SET((str2), STR_NOEMBED); memcpy(buf, RSTRING_PTR(str), capacity); RBASIC(str2)->flags &= ~(STR_SHARED | FL_USER5 | FL_USER6); - RSTRING(str2)->as.heap.aux.capa = capacity; + RSTRING(str2)->as.heap.aux.capa = RSTRING_LEN(str); RSTRING(str2)->as.heap.ptr = buf; RSTRING(str2)->len = RSTRING_LEN(str); + TERM_FILL(RSTRING_END(str2), TERM_LEN(str)); return str2; } diff --git a/gc/default/default.c b/gc/default/default.c index 5758fe188555d2..0920ccb11f4d4e 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -301,9 +301,24 @@ int ruby_rgengc_debug; #ifndef GC_ENABLE_LAZY_SWEEP # define GC_ENABLE_LAZY_SWEEP 1 #endif + +#ifndef VERIFY_FREE_SIZE +#if RUBY_DEBUG +#define VERIFY_FREE_SIZE 1 +#else +#define VERIFY_FREE_SIZE 0 +#endif +#endif + +#if VERIFY_FREE_SIZE +#undef CALC_EXACT_MALLOC_SIZE +#define CALC_EXACT_MALLOC_SIZE 1 +#endif + #ifndef CALC_EXACT_MALLOC_SIZE # define CALC_EXACT_MALLOC_SIZE 0 #endif + #if defined(HAVE_MALLOC_USABLE_SIZE) || CALC_EXACT_MALLOC_SIZE > 0 # ifndef MALLOC_ALLOCATED_SIZE # define MALLOC_ALLOCATED_SIZE 0 @@ -8255,6 +8270,11 @@ rb_gc_impl_free(void *objspace_ptr, void *ptr, size_t old_size) } #if CALC_EXACT_MALLOC_SIZE struct malloc_obj_info *info = (struct malloc_obj_info *)ptr - 1; +#if VERIFY_FREE_SIZE + if (old_size && (old_size + sizeof(struct malloc_obj_info)) != info->size) { + rb_bug("buffer %p freed with size %lu, but was allocated with size %lu", ptr, old_size, info->size - sizeof(struct malloc_obj_info)); + } +#endif ptr = info; old_size = info->size; #endif diff --git a/imemo.c b/imemo.c index d949466a776f3d..0f7c260eb92cdd 100644 --- a/imemo.c +++ b/imemo.c @@ -57,7 +57,7 @@ rb_imemo_tmpbuf_new(void) rb_gc_register_pinning_obj((VALUE)obj); obj->ptr = NULL; - obj->cnt = 0; + obj->size = 0; return (VALUE)obj; } @@ -71,7 +71,7 @@ rb_alloc_tmp_buffer_with_count(volatile VALUE *store, size_t size, size_t cnt) *store = (VALUE)tmpbuf; void *ptr = ruby_xmalloc(size); tmpbuf->ptr = ptr; - tmpbuf->cnt = cnt; + tmpbuf->size = size; return ptr; } @@ -94,9 +94,9 @@ rb_free_tmp_buffer(volatile VALUE *store) rb_imemo_tmpbuf_t *s = (rb_imemo_tmpbuf_t*)ATOMIC_VALUE_EXCHANGE(*store, 0); if (s) { void *ptr = ATOMIC_PTR_EXCHANGE(s->ptr, 0); - long cnt = s->cnt; - s->cnt = 0; - ruby_sized_xfree(ptr, sizeof(VALUE) * cnt); + long size = s->size; + s->size = 0; + ruby_sized_xfree(ptr, size); } } @@ -261,7 +261,7 @@ rb_imemo_memsize(VALUE obj) case imemo_throw_data: break; case imemo_tmpbuf: - size += ((rb_imemo_tmpbuf_t *)obj)->cnt * sizeof(VALUE); + size += ((rb_imemo_tmpbuf_t *)obj)->size; break; case imemo_fields: @@ -506,7 +506,7 @@ rb_imemo_mark_and_move(VALUE obj, bool reference_updating) const rb_imemo_tmpbuf_t *m = (const rb_imemo_tmpbuf_t *)obj; if (!reference_updating) { - rb_gc_mark_locations(m->ptr, m->ptr + m->cnt); + rb_gc_mark_locations(m->ptr, m->ptr + (m->size / sizeof(VALUE))); } break; diff --git a/internal/gc.h b/internal/gc.h index ee1f390e104cff..427b2f4553afc0 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -292,36 +292,6 @@ void rb_gc_writebarrier_remember(VALUE obj); const char *rb_obj_info(VALUE obj); void ruby_annotate_mmap(const void *addr, unsigned long size, const char *name); -#if defined(HAVE_MALLOC_USABLE_SIZE) || defined(HAVE_MALLOC_SIZE) || defined(_WIN32) - -static inline void * -ruby_sized_xrealloc_inlined(void *ptr, size_t new_size, size_t old_size) -{ - return ruby_xrealloc(ptr, new_size); -} - -static inline void * -ruby_sized_xrealloc2_inlined(void *ptr, size_t new_count, size_t elemsiz, size_t old_count) -{ - return ruby_xrealloc2(ptr, new_count, elemsiz); -} - -static inline void -ruby_sized_xfree_inlined(void *ptr, size_t size) -{ - ruby_xfree(ptr); -} - -# define SIZED_REALLOC_N(x, y, z, w) REALLOC_N(x, y, z) - -static inline void * -ruby_sized_realloc_n(void *ptr, size_t new_count, size_t element_size, size_t old_count) -{ - return ruby_xrealloc2(ptr, new_count, element_size); -} - -#else - static inline void * ruby_sized_xrealloc_inlined(void *ptr, size_t new_size, size_t old_size) { @@ -349,8 +319,6 @@ ruby_sized_realloc_n(void *ptr, size_t new_count, size_t element_size, size_t ol return ruby_sized_xrealloc2(ptr, new_count, element_size, old_count); } -#endif /* HAVE_MALLOC_USABLE_SIZE */ - #define ruby_sized_xrealloc ruby_sized_xrealloc_inlined #define ruby_sized_xrealloc2 ruby_sized_xrealloc2_inlined #define ruby_sized_xfree ruby_sized_xfree_inlined diff --git a/internal/imemo.h b/internal/imemo.h index 31cc0be35ae9c3..6534cec5d7ca9f 100644 --- a/internal/imemo.h +++ b/internal/imemo.h @@ -94,7 +94,7 @@ struct vm_ifunc { struct rb_imemo_tmpbuf_struct { VALUE flags; VALUE *ptr; /* malloc'ed buffer */ - size_t cnt; /* buffer size in VALUE */ + size_t size; /* buffer size in bytes */ }; /*! MEMO diff --git a/prism_compile.c b/prism_compile.c index f3aae95487c869..771db13f8909e7 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -11330,7 +11330,7 @@ pm_read_file(pm_string_t *string, const char *filepath) } size_t length = (size_t) len; - uint8_t *source = malloc(length); + uint8_t *source = malloc(length); // FIXME: using raw malloc because that's what Prism uses. memcpy(source, RSTRING_PTR(contents), length); *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length }; diff --git a/set.c b/set.c index 484439a40a6ab7..0fcfb1ef14748d 100644 --- a/set.c +++ b/set.c @@ -141,7 +141,7 @@ set_mark(void *ptr) static void set_free_embedded(struct set_object *sobj) { - free((&sobj->table)->entries); + xfree((&sobj->table)->entries); } static void @@ -1187,7 +1187,7 @@ set_reset_table_with_type(VALUE set, const struct st_hash_type *type) set_iter(set, set_merge_i, (st_data_t)&args); set_free_embedded(sobj); memcpy(&sobj->table, new, sizeof(*new)); - free(new); + xfree(new); } else { sobj->table.type = type; diff --git a/string.c b/string.c index a36eb6e9f381c0..3cfc77600b6f1b 100644 --- a/string.c +++ b/string.c @@ -1559,7 +1559,7 @@ rb_str_tmp_frozen_no_embed_acquire(VALUE orig) } RSTRING(str)->len = RSTRING(orig)->len; - RSTRING(str)->as.heap.aux.capa = capa; + RSTRING(str)->as.heap.aux.capa = capa + (TERM_LEN(orig) - TERM_LEN(str)); return str; } @@ -3135,7 +3135,7 @@ str_subseq(VALUE str, long beg, long len) const int termlen = TERM_LEN(str); if (!SHARABLE_SUBSTRING_P(beg, len, RSTRING_LEN(str))) { - str2 = rb_str_new(RSTRING_PTR(str) + beg, len); + str2 = rb_enc_str_new(RSTRING_PTR(str) + beg, len, rb_str_enc_get(str)); RB_GC_GUARD(str); return str2; } @@ -7814,7 +7814,7 @@ mapping_buffer_free(void *p) while (current_buffer) { previous_buffer = current_buffer; current_buffer = current_buffer->next; - ruby_sized_xfree(previous_buffer, previous_buffer->capa); + ruby_sized_xfree(previous_buffer, offsetof(mapping_buffer, space) + previous_buffer->capa); } } diff --git a/vm_eval.c b/vm_eval.c index 652fc4d85feac2..cf01b4a62b37da 100644 --- a/vm_eval.c +++ b/vm_eval.c @@ -1776,7 +1776,7 @@ pm_eval_make_iseq(VALUE src, VALUE fname, int line, /* We need to duplicate the string because the Ruby string may * be embedded so compaction could move the string and the pointer * will change. */ - char *name_dup = xmalloc(length + 1); + char *name_dup = malloc(length + 1); // FIXME: using raw `malloc` because that is what Prism uses. strlcpy(name_dup, name, length + 1); RB_GC_GUARD(name_obj); From 5fec5456b9cd9dd7fdea18ac9c43b1cf6d4cf4cb Mon Sep 17 00:00:00 2001 From: Kevin Menard Date: Thu, 29 Jan 2026 18:24:50 -0500 Subject: [PATCH 68/77] ZJIT: Optimize `super` calls to C function targets (#15993) This PR is an extension of the work in #15816. There, we optimized `super` calls where the target method was an ISeq. The code bailed on any other `super` target method type. The discussion for that PR included the ZJIT stats from running the _railsbench_ benchmark in _ruby-bench_. The stats showed the other types of `super` calls we encountered that we didn't process: ``` Top-2 not optimized method types for super (100.0% of total 2,700,015): cfunc: 2,680,044 (99.3%) attrset: 19,971 ( 0.7%) ``` This PR handles most of the cfunc cases. We still only handle simple method signatures and don't handle blocks at all, but if the target function is a cfunc where `argc != 2`, we now optimize to either `Insn::CCallWithFrame` or `Insn::CCallVariadic` as appropriate. This covers 100% of the C func cases we encounter in _railsbench_.
Baseline ZJIT stats

``` Top-20 not inlined C methods (51.1% of total 15,736,824): Hash#key?: 1,260,867 ( 8.0%) Regexp#match?: 970,899 ( 6.2%) Hash#fetch: 898,248 ( 5.7%) Integer#===: 439,075 ( 2.8%) Hash#delete: 405,821 ( 2.6%) Array#any?: 403,598 ( 2.6%) String.new: 401,818 ( 2.6%) String#b: 319,473 ( 2.0%) String#to_sym: 272,868 ( 1.7%) Array#all?: 260,132 ( 1.7%) Fiber.current: 259,588 ( 1.6%) Array#join: 257,125 ( 1.6%) Array#include?: 247,718 ( 1.6%) Kernel#Array: 244,574 ( 1.6%) String#<<: 242,475 ( 1.5%) Symbol#end_with?: 239,977 ( 1.5%) String#force_encoding: 239,520 ( 1.5%) Kernel#dup: 232,701 ( 1.5%) Array#[]: 225,160 ( 1.4%) Kernel#respond_to?: 220,246 ( 1.4%) Top-20 calls to C functions from JIT code (75.3% of total 106,711,108): rb_vm_opt_send_without_block: 22,031,658 (20.6%) rb_hash_aref: 9,335,540 ( 8.7%) rb_vm_env_write: 7,865,750 ( 7.4%) rb_vm_send: 6,836,936 ( 6.4%) rb_zjit_writebarrier_check_immediate: 5,623,383 ( 5.3%) rb_vm_getinstancevariable: 5,012,846 ( 4.7%) rb_ivar_get_at_no_ractor_check: 4,868,219 ( 4.6%) rb_vm_invokesuper: 3,240,208 ( 3.0%) rb_hash_aset: 2,061,526 ( 1.9%) rb_obj_is_kind_of: 1,812,573 ( 1.7%) rb_vm_invokeblock: 1,647,238 ( 1.5%) rb_vm_opt_getconstant_path: 1,295,958 ( 1.2%) Hash#key?: 1,260,867 ( 1.2%) rb_class_allocate_instance: 1,190,707 ( 1.1%) rb_hash_new_with_size: 1,150,766 ( 1.1%) rb_vm_setinstancevariable: 1,119,304 ( 1.0%) rb_ec_ary_new_from_values: 1,050,781 ( 1.0%) rb_obj_alloc: 993,445 ( 0.9%) rb_str_concat_literals: 984,558 ( 0.9%) Regexp#match?: 970,899 ( 0.9%) Top-2 not optimized method types for send (100.0% of total 3,423,067): iseq: 3,410,096 (99.6%) optimized: 12,971 ( 0.4%) Top-2 not optimized method types for send_without_block (100.0% of total 319,311): optimized_send: 246,250 (77.1%) null: 73,061 (22.9%) Top-2 not optimized method types for super (100.0% of total 2,680,495): cfunc: 2,660,334 (99.2%) attrset: 20,161 ( 0.8%) Top-4 instructions with uncategorized fallback reason (100.0% of total 2,617,553): invokeblock: 1,647,238 (62.9%) sendforward: 748,101 (28.6%) invokesuperforward: 199,443 ( 7.6%) opt_send_without_block: 22,771 ( 0.9%) Top-20 send fallback reasons (100.0% of total 34,703,584): send_without_block_polymorphic: 12,818,893 (36.9%) send_without_block_no_profiles: 5,442,960 (15.7%) send_not_optimized_method_type: 3,423,067 ( 9.9%) super_not_optimized_method_type: 2,680,495 ( 7.7%) uncategorized: 2,617,553 ( 7.5%) send_no_profiles: 2,083,822 ( 6.0%) one_or_more_complex_arg_pass: 1,663,149 ( 4.8%) send_polymorphic: 1,329,141 ( 3.8%) send_without_block_not_optimized_need_permission: 510,815 ( 1.5%) too_many_args_for_lir: 477,266 ( 1.4%) singleton_class_seen: 441,058 ( 1.3%) super_complex_args_pass: 331,767 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 246,250 ( 0.7%) send_without_block_megamorphic: 228,672 ( 0.7%) super_target_complex_args_pass: 165,855 ( 0.5%) send_without_block_not_optimized_method_type: 73,061 ( 0.2%) obj_to_string_not_string: 67,862 ( 0.2%) super_call_with_block: 40,004 ( 0.1%) send_without_block_direct_keyword_mismatch: 39,783 ( 0.1%) super_polymorphic: 22,087 ( 0.1%) Top-3 setivar fallback reasons (100.0% of total 1,119,304): not_monomorphic: 1,077,792 (96.3%) not_t_object: 41,335 ( 3.7%) new_shape_needs_extension: 177 ( 0.0%) Top-1 getivar fallback reasons (100.0% of total 5,012,871): not_monomorphic: 5,012,871 (100.0%) Top-2 definedivar fallback reasons (100.0% of total 142,798): not_monomorphic: 142,711 (99.9%) not_t_object: 87 ( 0.1%) Top-6 invokeblock handler (100.0% of total 1,647,238): monomorphic_iseq: 878,253 (53.3%) polymorphic: 483,612 (29.4%) monomorphic_other: 134,943 ( 8.2%) monomorphic_ifunc: 115,175 ( 7.0%) megamorphic: 34,939 ( 2.1%) no_profiles: 316 ( 0.0%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 2,068,581): param_forwardable: 729,353 (35.3%) param_block: 716,533 (34.6%) param_rest: 327,865 (15.8%) caller_splat: 114,365 ( 5.5%) caller_kw_splat: 99,266 ( 4.8%) param_kwrest: 80,149 ( 3.9%) caller_blockarg: 877 ( 0.0%) caller_kwarg: 173 ( 0.0%) Top-1 compile error reasons (100.0% of total 156,707): exception_handler: 156,707 (100.0%) Top-5 unhandled YARV insns (100.0% of total 201,517): getconstant: 160,920 (79.9%) expandarray: 19,985 ( 9.9%) setblockparam: 19,972 ( 9.9%) checkmatch: 480 ( 0.2%) once: 160 ( 0.1%) Top-2 unhandled HIR insns (100.0% of total 128,647): throw: 93,060 (72.3%) invokebuiltin: 35,587 (27.7%) Top-19 side exit reasons (100.0% of total 3,484,374): guard_shape_failure: 1,042,511 (29.9%) guard_type_failure: 812,342 (23.3%) block_param_proxy_not_iseq_or_ifunc: 795,628 (22.8%) unhandled_yarv_insn: 201,517 ( 5.8%) compile_error: 156,707 ( 4.5%) unhandled_hir_insn: 128,647 ( 3.7%) unhandled_newarray_send_pack: 119,187 ( 3.4%) patchpoint_method_redefined: 80,619 ( 2.3%) unhandled_block_arg: 60,517 ( 1.7%) block_param_proxy_modified: 49,695 ( 1.4%) guard_less_failure: 20,033 ( 0.6%) fixnum_lshift_overflow: 9,985 ( 0.3%) patchpoint_stable_constant_names: 5,752 ( 0.2%) fixnum_mult_overflow: 570 ( 0.0%) obj_to_string_fallback: 498 ( 0.0%) patchpoint_no_ep_escape: 109 ( 0.0%) interrupt: 43 ( 0.0%) guard_super_method_entry: 8 ( 0.0%) guard_greater_eq_failure: 6 ( 0.0%) send_count: 133,679,714 dynamic_send_count: 34,703,584 (26.0%) optimized_send_count: 98,976,130 (74.0%) dynamic_setivar_count: 1,119,304 ( 0.8%) dynamic_getivar_count: 5,012,871 ( 3.7%) dynamic_definedivar_count: 142,798 ( 0.1%) iseq_optimized_send_count: 38,085,055 (28.5%) inline_cfunc_optimized_send_count: 39,628,908 (29.6%) inline_iseq_optimized_send_count: 3,624,852 ( 2.7%) non_variadic_cfunc_optimized_send_count: 10,434,756 ( 7.8%) variadic_cfunc_optimized_send_count: 7,202,559 ( 5.4%) compiled_iseq_count: 2,868 failed_iseq_count: 0 compile_time: 8,809ms profile_time: 135ms gc_time: 255ms invalidation_time: 21ms vm_write_pc_count: 116,809,164 vm_write_sp_count: 116,809,164 vm_write_locals_count: 111,533,227 vm_write_stack_count: 111,533,227 vm_write_to_parent_iseq_local_count: 521,277 vm_read_from_parent_iseq_local_count: 12,757,231 guard_type_count: 126,653,751 guard_type_exit_ratio: 0.6% guard_shape_count: 44,193,824 guard_shape_exit_ratio: 2.4% code_region_bytes: 14,336,000 zjit_alloc_bytes: 19,282,889 total_mem_bytes: 33,618,889 side_exit_count: 3,484,374 total_insn_count: 697,672,179 vm_insn_count: 52,531,010 zjit_insn_count: 645,141,169 ratio_in_zjit: 92.5% ```

Optimized invokesuper stats

``` Top-20 not inlined C methods (51.1% of total 15,736,852): Hash#key?: 1,260,867 ( 8.0%) Regexp#match?: 970,900 ( 6.2%) Hash#fetch: 898,248 ( 5.7%) Integer#===: 439,075 ( 2.8%) Hash#delete: 405,825 ( 2.6%) Array#any?: 403,600 ( 2.6%) String.new: 401,818 ( 2.6%) String#b: 319,473 ( 2.0%) String#to_sym: 272,868 ( 1.7%) Array#all?: 260,132 ( 1.7%) Fiber.current: 259,588 ( 1.6%) Array#join: 257,125 ( 1.6%) Array#include?: 247,718 ( 1.6%) Kernel#Array: 244,579 ( 1.6%) String#<<: 242,475 ( 1.5%) Symbol#end_with?: 239,977 ( 1.5%) String#force_encoding: 239,520 ( 1.5%) Kernel#dup: 232,706 ( 1.5%) Array#[]: 225,160 ( 1.4%) Kernel#respond_to?: 220,246 ( 1.4%) Top-20 calls to C functions from JIT code (73.2% of total 106,690,862): rb_vm_opt_send_without_block: 22,031,722 (20.7%) rb_hash_aref: 9,335,543 ( 8.8%) rb_vm_env_write: 7,865,751 ( 7.4%) rb_vm_send: 6,836,939 ( 6.4%) rb_zjit_writebarrier_check_immediate: 5,623,259 ( 5.3%) rb_vm_getinstancevariable: 5,012,844 ( 4.7%) rb_ivar_get_at_no_ractor_check: 4,868,219 ( 4.6%) rb_hash_aset: 2,061,385 ( 1.9%) rb_obj_is_kind_of: 1,812,575 ( 1.7%) rb_vm_invokeblock: 1,647,238 ( 1.5%) rb_vm_opt_getconstant_path: 1,295,958 ( 1.2%) Hash#key?: 1,260,867 ( 1.2%) rb_class_allocate_instance: 1,190,704 ( 1.1%) rb_hash_new_with_size: 1,150,765 ( 1.1%) rb_vm_setinstancevariable: 1,119,304 ( 1.0%) rb_ec_ary_new_from_values: 1,050,780 ( 1.0%) rb_obj_alloc: 993,446 ( 0.9%) rb_str_concat_literals: 984,559 ( 0.9%) Regexp#match?: 970,900 ( 0.9%) rb_obj_as_string_result: 937,751 ( 0.9%) Top-2 not optimized method types for send (100.0% of total 3,423,067): iseq: 3,410,096 (99.6%) optimized: 12,971 ( 0.4%) Top-2 not optimized method types for send_without_block (100.0% of total 319,311): optimized_send: 246,250 (77.1%) null: 73,061 (22.9%) Top-1 not optimized method types for super (100.0% of total 20,161): attrset: 20,161 (100.0%) Top-4 instructions with uncategorized fallback reason (100.0% of total 2,617,553): invokeblock: 1,647,238 (62.9%) sendforward: 748,101 (28.6%) invokesuperforward: 199,443 ( 7.6%) opt_send_without_block: 22,771 ( 0.9%) Top-20 send fallback reasons (100.0% of total 32,043,318): send_without_block_polymorphic: 12,818,949 (40.0%) send_without_block_no_profiles: 5,442,967 (17.0%) send_not_optimized_method_type: 3,423,067 (10.7%) uncategorized: 2,617,553 ( 8.2%) send_no_profiles: 2,083,824 ( 6.5%) one_or_more_complex_arg_pass: 1,663,150 ( 5.2%) send_polymorphic: 1,329,142 ( 4.1%) send_without_block_not_optimized_need_permission: 510,814 ( 1.6%) too_many_args_for_lir: 477,267 ( 1.5%) singleton_class_seen: 441,058 ( 1.4%) super_complex_args_pass: 331,767 ( 1.0%) send_without_block_not_optimized_method_type_optimized: 246,250 ( 0.8%) send_without_block_megamorphic: 228,672 ( 0.7%) super_target_complex_args_pass: 165,855 ( 0.5%) send_without_block_not_optimized_method_type: 73,061 ( 0.2%) obj_to_string_not_string: 67,862 ( 0.2%) super_call_with_block: 40,004 ( 0.1%) send_without_block_direct_keyword_mismatch: 39,783 ( 0.1%) super_polymorphic: 22,088 ( 0.1%) super_not_optimized_method_type: 20,161 ( 0.1%) Top-3 setivar fallback reasons (100.0% of total 1,119,304): not_monomorphic: 1,077,792 (96.3%) not_t_object: 41,335 ( 3.7%) new_shape_needs_extension: 177 ( 0.0%) Top-1 getivar fallback reasons (100.0% of total 5,012,869): not_monomorphic: 5,012,869 (100.0%) Top-2 definedivar fallback reasons (100.0% of total 142,798): not_monomorphic: 142,711 (99.9%) not_t_object: 87 ( 0.1%) Top-6 invokeblock handler (100.0% of total 1,647,238): monomorphic_iseq: 878,253 (53.3%) polymorphic: 483,612 (29.4%) monomorphic_other: 134,943 ( 8.2%) monomorphic_ifunc: 115,175 ( 7.0%) megamorphic: 34,939 ( 2.1%) no_profiles: 316 ( 0.0%) Top-8 popular complex argument-parameter features not optimized (100.0% of total 2,068,582): param_forwardable: 729,353 (35.3%) param_block: 716,534 (34.6%) param_rest: 327,865 (15.8%) caller_splat: 114,365 ( 5.5%) caller_kw_splat: 99,266 ( 4.8%) param_kwrest: 80,149 ( 3.9%) caller_blockarg: 877 ( 0.0%) caller_kwarg: 173 ( 0.0%) Top-1 compile error reasons (100.0% of total 156,707): exception_handler: 156,707 (100.0%) Top-5 unhandled YARV insns (100.0% of total 201,517): getconstant: 160,920 (79.9%) expandarray: 19,985 ( 9.9%) setblockparam: 19,972 ( 9.9%) checkmatch: 480 ( 0.2%) once: 160 ( 0.1%) Top-2 unhandled HIR insns (100.0% of total 128,646): throw: 93,060 (72.3%) invokebuiltin: 35,586 (27.7%) Top-19 side exit reasons (100.0% of total 3,504,293): guard_shape_failure: 1,042,515 (29.7%) guard_type_failure: 812,249 (23.2%) block_param_proxy_not_iseq_or_ifunc: 795,628 (22.7%) unhandled_yarv_insn: 201,517 ( 5.8%) compile_error: 156,707 ( 4.5%) unhandled_hir_insn: 128,646 ( 3.7%) unhandled_newarray_send_pack: 119,187 ( 3.4%) patchpoint_method_redefined: 80,779 ( 2.3%) unhandled_block_arg: 60,517 ( 1.7%) block_param_proxy_modified: 49,695 ( 1.4%) guard_less_failure: 20,033 ( 0.6%) guard_super_method_entry: 19,855 ( 0.6%) fixnum_lshift_overflow: 9,985 ( 0.3%) patchpoint_stable_constant_names: 5,752 ( 0.2%) fixnum_mult_overflow: 569 ( 0.0%) obj_to_string_fallback: 498 ( 0.0%) patchpoint_no_ep_escape: 109 ( 0.0%) interrupt: 46 ( 0.0%) guard_greater_eq_failure: 6 ( 0.0%) send_count: 133,600,402 dynamic_send_count: 32,043,318 (24.0%) optimized_send_count: 101,557,084 (76.0%) dynamic_setivar_count: 1,119,304 ( 0.8%) dynamic_getivar_count: 5,012,869 ( 3.8%) dynamic_definedivar_count: 142,798 ( 0.1%) iseq_optimized_send_count: 38,025,870 (28.5%) inline_cfunc_optimized_send_count: 39,628,762 (29.7%) inline_iseq_optimized_send_count: 3,624,854 ( 2.7%) non_variadic_cfunc_optimized_send_count: 12,631,917 ( 9.5%) variadic_cfunc_optimized_send_count: 7,645,681 ( 5.7%) compiled_iseq_count: 2,870 failed_iseq_count: 0 compile_time: 8,419ms profile_time: 133ms gc_time: 248ms invalidation_time: 20ms vm_write_pc_count: 116,729,857 vm_write_sp_count: 116,729,857 vm_write_locals_count: 111,453,921 vm_write_stack_count: 111,453,921 vm_write_to_parent_iseq_local_count: 521,275 vm_read_from_parent_iseq_local_count: 12,757,225 guard_type_count: 126,594,209 guard_type_exit_ratio: 0.6% guard_shape_count: 44,193,683 guard_shape_exit_ratio: 2.4% code_region_bytes: 14,368,768 zjit_alloc_bytes: 19,581,578 total_mem_bytes: 33,950,346 side_exit_count: 3,504,293 total_insn_count: 697,692,070 vm_insn_count: 52,828,675 zjit_insn_count: 644,863,395 ratio_in_zjit: 92.4% ```

--- test/ruby/test_zjit.rb | 62 ++++++++++- zjit/src/hir.rs | 209 ++++++++++++++++++++++++++------------ zjit/src/hir/opt_tests.rs | 67 ++++++++++-- 3 files changed, 260 insertions(+), 78 deletions(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 6ad06f9453e9d3..7b068e9898fc80 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1096,16 +1096,68 @@ def test }, call_threshold: 2 end - def test_invokesuper_to_cfunc - assert_compiles '["MyArray", 3]', %q{ - class MyArray < Array + def test_invokesuper_to_cfunc_no_args + assert_compiles '["MyString", 3]', %q{ + class MyString < String def length - ["MyArray", super] + ["MyString", super] end end def test - MyArray.new([1, 2, 3]).length + MyString.new("abc").length + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_to_cfunc_simple_args + assert_compiles '["MyString", true]', %q{ + class MyString < String + def include?(other) + ["MyString", super(other)] + end + end + + def test + MyString.new("abc").include?("bc") + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + + def test_invokesuper_to_cfunc_with_optional_arg + assert_compiles '["MyString", 6]', %q{ + class MyString < String + def byteindex(needle, offset = 0) + ["MyString", super(needle, offset)] + end + end + + def test + MyString.new("hello world").byteindex("world") + end + + test # profile invokesuper + test # compile + run compiled code + }, call_threshold: 2 + end + + def test_invokesuper_to_cfunc_varargs + assert_compiles '["MyString", true]', %q{ + class MyString < String + def end_with?(str) + ["MyString", super(str)] + end + end + + def test + MyString.new("abc").end_with?("bc") end test # profile invokesuper diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 8a9d5a5bb09f90..901beffea02772 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -1297,6 +1297,20 @@ fn get_local_var_name_for_printer(iseq: Option, level: u32, ep_offset: Some(format!(":{}", id.contents_lossy())) } +/// Construct a qualified method name for display/debug output. +/// Returns strings like "Array#length" for instance methods or "Foo.bar" for singleton methods. +fn qualified_method_name(class: VALUE, method_id: ID) -> String { + let method_name = method_id.contents_lossy(); + // rb_zjit_singleton_class_p also checks if it's a class + if unsafe { rb_zjit_singleton_class_p(class) } { + let class_name = get_class_name(unsafe { rb_class_attached_object(class) }); + format!("{class_name}.{method_name}") + } else { + let class_name = get_class_name(class); + format!("{class_name}#{method_name}") + } +} + static REGEXP_FLAGS: &[(u32, &str)] = &[ (ONIG_OPTION_MULTILINE, "MULTILINE"), (ONIG_OPTION_IGNORECASE, "IGNORECASE"), @@ -3504,6 +3518,40 @@ impl Function { }; } Insn::InvokeSuper { recv, cd, blockiseq, args, state, .. } => { + // Helper to emit common guards for super call optimization. + fn emit_super_call_guards( + fun: &mut Function, + block: BlockId, + super_cme: *const rb_callable_method_entry_t, + current_cme: *const rb_callable_method_entry_t, + mid: ID, + state: InsnId, + ) { + fun.push_insn(block, Insn::PatchPoint { + invariant: Invariant::MethodRedefined { + klass: unsafe { (*super_cme).defined_class }, + method: mid, + cme: super_cme + }, + state + }); + + let lep = fun.push_insn(block, Insn::GetLEP); + fun.push_insn(block, Insn::GuardSuperMethodEntry { + lep, + cme: current_cme, + state + }); + + let block_handler = fun.push_insn(block, Insn::GetBlockHandler { lep }); + fun.push_insn(block, Insn::GuardBitEquals { + val: block_handler, + expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), + reason: SideExitReason::UnhandledBlockArg, + state + }); + } + // Don't handle calls with literal blocks (e.g., super { ... }) if !blockiseq.is_null() { self.push_insn_id(block, insn_id); @@ -3567,68 +3615,107 @@ impl Function { continue; } - // Check if it's an ISEQ method; bail if it isn't. let def_type = unsafe { get_cme_def_type(super_cme) }; - if def_type != VM_METHOD_TYPE_ISEQ { - self.push_insn_id(block, insn_id); - self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type))); - continue; - } - // Check if the super method's parameters support direct send. - // If not, we can't do direct dispatch. - let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; - // TODO: pass Option to can_direct_send when we start specializing super { ... } - if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) { - self.push_insn_id(block, insn_id); - self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); - continue; - } + if def_type == VM_METHOD_TYPE_ISEQ { + // Check if the super method's parameters support direct send. + // If not, we can't do direct dispatch. + let super_iseq = unsafe { get_def_iseq_ptr((*super_cme).def) }; + // TODO: pass Option to can_direct_send when we start specializing `super { ... }`. + if !can_direct_send(self, block, super_iseq, ci, insn_id, args.as_slice(), None) { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperTargetComplexArgsPass); + continue; + } - // Add PatchPoint for method redefinition. - self.push_insn(block, Insn::PatchPoint { - invariant: Invariant::MethodRedefined { - klass: unsafe { (*super_cme).defined_class }, - method: mid, - cme: super_cme - }, - state - }); + let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, super_iseq, state) + .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else { + self.push_insn_id(block, insn_id); continue; + }; - // Guard that we're calling `super` from the expected method context. - let lep = self.push_insn(block, Insn::GetLEP); - self.push_insn(block, Insn::GuardSuperMethodEntry { - lep, - cme: current_cme, - state - }); + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); - // Guard that no block is being passed (implicit or explicit). - let block_handler = self.push_insn(block, Insn::GetBlockHandler { lep }); - self.push_insn(block, Insn::GuardBitEquals { - val: block_handler, - expected: Const::Value(VALUE(VM_BLOCK_HANDLER_NONE as usize)), - reason: SideExitReason::UnhandledBlockArg, - state - }); + // Use SendDirect with the super method's CME and ISEQ. + let send_direct = self.push_insn(block, Insn::SendDirect { + recv, + cd, + cme: super_cme, + iseq: super_iseq, + args: processed_args, + kw_bits, + state: send_state, + blockiseq: None, + }); + self.make_equal_to(insn_id, send_direct); - let Ok((send_state, processed_args, kw_bits)) = self.prepare_direct_send_args(block, &args, ci, super_iseq, state) - .inspect_err(|&reason| self.set_dynamic_send_reason(insn_id, reason)) else { - self.push_insn_id(block, insn_id); continue; - }; + } else if def_type == VM_METHOD_TYPE_CFUNC { + let cfunc = unsafe { get_cme_def_body_cfunc(super_cme) }; + let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + let cfunc_ptr = unsafe { get_mct_func(cfunc) }.cast(); + + match cfunc_argc { + // C function with fixed argument count. + 0.. => { + // Check argc matches + if args.len() != cfunc_argc as usize { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, ArgcParamMismatch); + continue; + } - // Use SendDirect with the super method's CME and ISEQ. - let send_direct = self.push_insn(block, Insn::SendDirect { - recv, - cd, - cme: super_cme, - iseq: super_iseq, - args: processed_args, - kw_bits, - state: send_state, - blockiseq: None, - }); - self.make_equal_to(insn_id, send_direct); + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); + + // Use CCallWithFrame for the C function. + let name = rust_str_to_id(&qualified_method_name(unsafe { (*super_cme).owner }, unsafe { (*super_cme).called_id })); + let ccall = self.push_insn(block, Insn::CCallWithFrame { + cd, + cfunc: cfunc_ptr, + recv, + args: args.clone(), + cme: super_cme, + name, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq: None, + }); + self.make_equal_to(insn_id, ccall); + } + + // Variadic C function: func(int argc, VALUE *argv, VALUE recv) + -1 => { + emit_super_call_guards(self, block, super_cme, current_cme, mid, state); + + // Use CCallVariadic for the variadic C function. + let name = rust_str_to_id(&qualified_method_name(unsafe { (*super_cme).owner }, unsafe { (*super_cme).called_id })); + let ccall = self.push_insn(block, Insn::CCallVariadic { + cfunc: cfunc_ptr, + recv, + args: args.clone(), + cme: super_cme, + name, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq: None, + }); + self.make_equal_to(insn_id, ccall); + } + + // Array-variadic: (self, args_ruby_array). + -2 => { + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::Cfunc)); + continue; + } + _ => unreachable!("unknown cfunc argc: {}", cfunc_argc) + } + } else { + // Other method types (not ISEQ or CFUNC) + self.push_insn_id(block, insn_id); + self.set_dynamic_send_reason(insn_id, SuperNotOptimizedMethodType(MethodType::from(def_type))); + continue; + } } _ => { self.push_insn_id(block, insn_id); } } @@ -4296,18 +4383,6 @@ impl Function { Err(()) } - fn qualified_method_name(class: VALUE, method_id: ID) -> String { - let method_name = method_id.contents_lossy(); - // rb_zjit_singleton_class_p also checks if it's a class - if unsafe { rb_zjit_singleton_class_p(class) } { - let class_name = get_class_name(unsafe { rb_class_attached_object(class) }); - format!("{class_name}.{method_name}") - } else { - let class_name = get_class_name(class); - format!("{class_name}#{method_name}") - } - } - fn count_not_inlined_cfunc(fun: &mut Function, block: BlockId, cme: *const rb_callable_method_entry_t) { let owner = unsafe { (*cme).owner }; let called_id = unsafe { (*cme).called_id }; diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 29b1e36331b02d..de4e2ec39db7a7 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -11406,7 +11406,7 @@ mod hir_opt_tests { } #[test] - fn test_invokesuper_to_cfunc_remains_invokesuper() { + fn test_invokesuper_to_cfunc_optimizes_to_ccall() { eval(" class MyArray < Array def length @@ -11418,10 +11418,10 @@ mod hir_opt_tests { "); let hir = hir_string_proc("MyArray.new.method(:length)"); - assert!(hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); - assert!(!hir.contains("SendDirect"), "Should not optimize to SendDirect for CFUNC:\n{hir}"); + assert!(!hir.contains("InvokeSuper "), "Expected unoptimized InvokeSuper but got:\n{hir}"); + assert!(hir.contains("CCallWithFrame"), "Should optimize to CCallWithFrame for non-variadic cfunc:\n{hir}"); - assert_snapshot!(hir, @r" + assert_snapshot!(hir, @" fn length@:4: bb0(): EntryPoint interpreter @@ -11431,9 +11431,64 @@ mod hir_opt_tests { EntryPoint JIT(0) Jump bb2(v4) bb2(v6:BasicObject): - v11:BasicObject = InvokeSuper v6, 0x1000 # SendFallbackReason: super: unsupported target method type Cfunc + PatchPoint MethodRedefined(Array@0x1000, length@0x1008, cme:0x1010) + v17:CPtr = GetLEP + GuardSuperMethodEntry v17, 0x1038 + v19:RubyValue = GetBlockHandler v17 + v20:FalseClass = GuardBitEquals v19, Value(false) + v21:BasicObject = CCallWithFrame v6, :Array#length@0x1040 CheckInterrupts - Return v11 + Return v21 + "); + } + + #[test] + fn test_invokesuper_to_variadic_cfunc_optimizes_to_ccall() { + eval(" + class MyString < String + def byteindex(needle, offset = 0) + super(needle, offset) + end + end + + MyString.new('hello world').byteindex('world', 0); MyString.new('hello world').byteindex('world', 0) + "); + + let hir = hir_string_proc("MyString.new('hello world').method(:byteindex)"); + assert!(!hir.contains("InvokeSuper "), "InvokeSuper should optimize to CCallVariadic but got:\n{hir}"); + assert!(hir.contains("CCallVariadic"), "Should optimize to CCallVariadic for variadic cfunc:\n{hir}"); + + assert_snapshot!(hir, @" + fn byteindex@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal :needle, l0, SP@5 + v3:BasicObject = GetLocal :offset, l0, SP@4 + v4:CPtr = LoadPC + v5:CPtr[CPtr(0x1000)] = Const CPtr(0x1008) + v6:CBool = IsBitEqual v4, v5 + IfTrue v6, bb2(v1, v2, v3) + Jump bb4(v1, v2, v3) + bb1(v10:BasicObject, v11:BasicObject): + EntryPoint JIT(0) + v12:NilClass = Const Value(nil) + Jump bb2(v10, v11, v12) + bb2(v19:BasicObject, v20:BasicObject, v21:BasicObject): + v24:Fixnum[0] = Const Value(0) + Jump bb4(v19, v20, v24) + bb3(v15:BasicObject, v16:BasicObject, v17:BasicObject): + EntryPoint JIT(1) + Jump bb4(v15, v16, v17) + bb4(v27:BasicObject, v28:BasicObject, v29:BasicObject): + PatchPoint MethodRedefined(String@0x1010, byteindex@0x1018, cme:0x1020) + v42:CPtr = GetLEP + GuardSuperMethodEntry v42, 0x1008 + v44:RubyValue = GetBlockHandler v42 + v45:FalseClass = GuardBitEquals v44, Value(false) + v46:BasicObject = CCallVariadic v27, :String#byteindex@0x1048, v28, v29 + CheckInterrupts + Return v46 "); } From ef583c93ebad9eb9cf988e35bfd8ee22fbedd2c0 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Thu, 29 Jan 2026 18:34:30 -0500 Subject: [PATCH 69/77] Fix NEWOBJ hook calling `rb_obj_memsize_of` on TypedData object (#16002) Fix NEWOBJ hook calling cruby functions on objects not filled yet. Objects like `TypedData` need to be zeroed out when calling `rb_obj_memsize_of`. Other object types need `fields_obj` to be 0 when they don't have one, etc. Fixes [Bug #21854] --- gc.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/gc.c b/gc.c index 935a9f5d4bdaaa..407541b309abb8 100644 --- a/gc.c +++ b/gc.c @@ -1014,9 +1014,7 @@ newobj_of(rb_ractor_t *cr, VALUE klass, VALUE flags, shape_id_t shape_id, bool w int lev = RB_GC_VM_LOCK_NO_BARRIER(); { size_t slot_size = rb_gc_obj_slot_size(obj); - if (slot_size > RVALUE_SIZE) { - memset((char *)obj + RVALUE_SIZE, 0, slot_size - RVALUE_SIZE); - } + memset((char *)obj + sizeof(struct RBasic), 0, slot_size - sizeof(struct RBasic)); /* We must disable GC here because the callback could call xmalloc * which could potentially trigger a GC, and a lot of code is unsafe @@ -1163,17 +1161,19 @@ rb_objspace_data_type_memsize(VALUE obj) { size_t size = 0; if (RTYPEDDATA_P(obj)) { - const rb_data_type_t *type = RTYPEDDATA_TYPE(obj); const void *ptr = RTYPEDDATA_GET_DATA(obj); - if (RTYPEDDATA_EMBEDDABLE_P(obj) && !RTYPEDDATA_EMBEDDED_P(obj)) { + if (ptr) { + const rb_data_type_t *type = RTYPEDDATA_TYPE(obj); + if (RTYPEDDATA_EMBEDDABLE_P(obj) && !RTYPEDDATA_EMBEDDED_P(obj)) { #ifdef HAVE_MALLOC_USABLE_SIZE - size += malloc_usable_size((void *)ptr); + size += malloc_usable_size((void *)ptr); #endif - } + } - if (ptr && type->function.dsize) { - size += type->function.dsize(ptr); + if (type->function.dsize) { + size += type->function.dsize(ptr); + } } } From 5068fe85e6f7e662cd6e15f5ed7b80a4be5e6056 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 30 Jan 2026 09:25:42 +0900 Subject: [PATCH 70/77] [ruby/resolv] v0.7.1 https://github.com/ruby/resolv/commit/8fc05c1cb6 --- lib/resolv.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/resolv.rb b/lib/resolv.rb index b6ff3485182e5d..9720b52c00fef3 100644 --- a/lib/resolv.rb +++ b/lib/resolv.rb @@ -35,7 +35,7 @@ class Resolv # The version string - VERSION = "0.7.0" + VERSION = "0.7.1" ## # Looks up the first IP address for +name+. From 604090cb8ececa2495b20a4bac71043db705f1d1 Mon Sep 17 00:00:00 2001 From: git Date: Fri, 30 Jan 2026 00:27:11 +0000 Subject: [PATCH 71/77] Update default gems list at 5068fe85e6f7e662cd6e15f5ed7b80 [ci skip] --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 9a1dd9c9337587..49eec73f4b492c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -47,6 +47,7 @@ releases. * RubyGems 4.1.0.dev * bundler 4.1.0.dev * prism 1.9.0 +* resolv 0.7.1 * stringio 3.2.1.dev * strscan 3.1.7.dev * syntax_suggest 2.0.3 From 0d66488f827a9afe63b2f1011908a7b78556f276 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 29 Jan 2026 17:18:05 -0500 Subject: [PATCH 72/77] Remove dead gc_stat_sym_weak_references_count --- gc/default/default.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 0920ccb11f4d4e..aaf6f56092b360 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -7471,7 +7471,6 @@ enum gc_stat_sym { gc_stat_sym_oldmalloc_increase_bytes, gc_stat_sym_oldmalloc_increase_bytes_limit, #endif - gc_stat_sym_weak_references_count, #if RGENGC_PROFILE gc_stat_sym_total_generated_normal_object_count, gc_stat_sym_total_generated_shady_object_count, @@ -7522,7 +7521,6 @@ setup_gc_stat_symbols(void) S(oldmalloc_increase_bytes); S(oldmalloc_increase_bytes_limit); #endif - S(weak_references_count); #if RGENGC_PROFILE S(total_generated_normal_object_count); S(total_generated_shady_object_count); From 25d91f467378ae4cf9f955975a9d03f57e663565 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 02:03:09 +0000 Subject: [PATCH 73/77] Bump actions/cache from 5.0.2 to 5.0.3 Bumps [actions/cache](https://github.com/actions/cache) from 5.0.2 to 5.0.3. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/8b402f58fbc84540c8b491a91e594a4576fec3d7...cdf6c1fa76f9f475f3d7449005a359c84ca0f306) --- updated-dependencies: - dependency-name: actions/cache dependency-version: 5.0.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/windows.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index f997ed56d5f269..f9d1335d464016 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -88,7 +88,7 @@ jobs: - name: Restore vcpkg artifact id: restore-vcpkg - uses: actions/cache/restore@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2 + uses: actions/cache/restore@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: src\vcpkg_installed key: windows-${{ matrix.os }}-vcpkg-${{ hashFiles('src/vcpkg.json') }} @@ -100,7 +100,7 @@ jobs: if: ${{ ! steps.restore-vcpkg.outputs.cache-hit }} - name: Save vcpkg artifact - uses: actions/cache/save@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2 + uses: actions/cache/save@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: src\vcpkg_installed key: windows-${{ matrix.os }}-vcpkg-${{ hashFiles('src/vcpkg.json') }} From edc132c399f661a5e94d801cebae2237c55343fb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 02:04:03 +0000 Subject: [PATCH 74/77] Bump actions/cache in /.github/actions/setup/directories Bumps [actions/cache](https://github.com/actions/cache) from 5.0.2 to 5.0.3. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/8b402f58fbc84540c8b491a91e594a4576fec3d7...cdf6c1fa76f9f475f3d7449005a359c84ca0f306) --- updated-dependencies: - dependency-name: actions/cache dependency-version: 5.0.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/actions/setup/directories/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/setup/directories/action.yml b/.github/actions/setup/directories/action.yml index 99d1fc0151cc7d..4f71ee592a6607 100644 --- a/.github/actions/setup/directories/action.yml +++ b/.github/actions/setup/directories/action.yml @@ -100,7 +100,7 @@ runs: path: ${{ inputs.srcdir }} fetch-depth: ${{ inputs.fetch-depth }} - - uses: actions/cache@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2 + - uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ${{ inputs.srcdir }}/.downloaded-cache key: ${{ runner.os }}-${{ runner.arch }}-downloaded-cache From 9bf8aaa26460b056236738a98521149f38611f88 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Tue, 27 Jan 2026 10:17:12 +0900 Subject: [PATCH 75/77] [ruby/rubygems] Revert "[ruby/rubygems] Revert "Merge pull request #8989 from nobu/test-tmpdir"" This reverts commit https://github.com/ruby/rubygems/commit/6e00da098aba. https://github.com/ruby/rubygems/commit/c6abdae812 --- spec/bundler/support/path.rb | 2 +- spec/bundler/support/rubygems_ext.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/bundler/support/path.rb b/spec/bundler/support/path.rb index 0a534dd40e44eb..1c49def88df292 100644 --- a/spec/bundler/support/path.rb +++ b/spec/bundler/support/path.rb @@ -114,7 +114,7 @@ def tmp(*path) end def tmp_root - source_root.join("tmp") + ruby_core? && (tmpdir = ENV["TMPDIR"]) ? Pathname(tmpdir) : source_root.join("tmp") end # Bump this version whenever you make a breaking change to the spec setup diff --git a/spec/bundler/support/rubygems_ext.rb b/spec/bundler/support/rubygems_ext.rb index 2d681529aac2ef..f168c7048701b9 100644 --- a/spec/bundler/support/rubygems_ext.rb +++ b/spec/bundler/support/rubygems_ext.rb @@ -43,7 +43,7 @@ def test_setup # sign extension bundles on macOS, to avoid trying to find the specified key # from the fake $HOME/Library/Keychains directory. ENV.delete "RUBY_CODESIGN" - ENV["TMPDIR"] = Path.tmpdir.to_s + ENV["TMPDIR"] = Path.tmpdir.to_s unless Path.ruby_core? require "rubygems/user_interaction" Gem::DefaultUserInteraction.ui = Gem::SilentUI.new From 5911a5231e6ec7bb50e15886ec2f3c5aae4e175e Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 30 Jan 2026 11:01:49 +0900 Subject: [PATCH 76/77] [ruby/rubygems] Handle symlink TMPDIR with macOS https://github.com/ruby/rubygems/commit/cf08f1ec4c --- spec/bundler/bundler/gem_helper_spec.rb | 1 + spec/bundler/support/path.rb | 12 +++++++++++- spec/bundler/support/rubygems_ext.rb | 13 ++++++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/spec/bundler/bundler/gem_helper_spec.rb b/spec/bundler/bundler/gem_helper_spec.rb index 94f66537d3ebf6..83c2dd237adcec 100644 --- a/spec/bundler/bundler/gem_helper_spec.rb +++ b/spec/bundler/bundler/gem_helper_spec.rb @@ -386,6 +386,7 @@ def sha512_hexdigest(path) credentials = double("credentials", "file?" => true) allow(Bundler.user_home).to receive(:join). with(".gem/credentials").and_return(credentials) + allow(Bundler.user_home).to receive(:join).and_call_original end describe "success messaging" do diff --git a/spec/bundler/support/path.rb b/spec/bundler/support/path.rb index 1c49def88df292..679f54152b29d7 100644 --- a/spec/bundler/support/path.rb +++ b/spec/bundler/support/path.rb @@ -114,7 +114,17 @@ def tmp(*path) end def tmp_root - ruby_core? && (tmpdir = ENV["TMPDIR"]) ? Pathname(tmpdir) : source_root.join("tmp") + if ruby_core? && (tmpdir = ENV["TMPDIR"]) + # Use realpath to resolve any symlinks in TMPDIR (e.g., on macOS /var -> /private/var) + real = begin + File.realpath(tmpdir) + rescue Errno::ENOENT, Errno::EACCES + tmpdir + end + Pathname(real) + else + source_root.join("tmp") + end end # Bump this version whenever you make a breaking change to the spec setup diff --git a/spec/bundler/support/rubygems_ext.rb b/spec/bundler/support/rubygems_ext.rb index f168c7048701b9..cf639a660a04fd 100644 --- a/spec/bundler/support/rubygems_ext.rb +++ b/spec/bundler/support/rubygems_ext.rb @@ -43,7 +43,18 @@ def test_setup # sign extension bundles on macOS, to avoid trying to find the specified key # from the fake $HOME/Library/Keychains directory. ENV.delete "RUBY_CODESIGN" - ENV["TMPDIR"] = Path.tmpdir.to_s unless Path.ruby_core? + if Path.ruby_core? + if (tmpdir = ENV["TMPDIR"]) + tmpdir_real = begin + File.realpath(tmpdir) + rescue Errno::ENOENT, Errno::EACCES + tmpdir + end + ENV["TMPDIR"] = tmpdir_real if tmpdir_real != tmpdir + end + else + ENV["TMPDIR"] = Path.tmpdir.to_s + end require "rubygems/user_interaction" Gem::DefaultUserInteraction.ui = Gem::SilentUI.new From eceb099103d0f598634b5d53f8920791216cdb40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Barri=C3=A9?= Date: Mon, 15 Dec 2025 19:46:29 +0100 Subject: [PATCH 77/77] Show chain of references in Ractor errors When an object fails to be made shareable with `Ractor.make_shareable` or when an unshareable object is accessed through module constants or module instance variables, the error message now includes the chain of references that leads to the unshareable value. --- bootstraptest/test_ractor.rb | 37 +++++++++ include/ruby/ractor.h | 4 +- ractor.c | 141 ++++++++++++++++++++++++++++------- ractor_core.h | 20 ++++- test/ruby/test_ractor.rb | 88 +++++++++++++++++++++- variable.c | 19 +++-- vm.c | 34 ++++++--- vm_core.h | 2 + vm_insnhelper.c | 6 +- 9 files changed, 301 insertions(+), 50 deletions(-) diff --git a/bootstraptest/test_ractor.rb b/bootstraptest/test_ractor.rb index e2a3e8dd5beff1..ea8eb6535f40c9 100644 --- a/bootstraptest/test_ractor.rb +++ b/bootstraptest/test_ractor.rb @@ -986,6 +986,43 @@ def self.fstr = @fstr a + b + c + d + e + f } +assert_equal <<-output.chomp, %q{ + from Hash default proc + from instance variable @ivar of an instance of Foo + from block's self (an instance of Foo) + from Hash default proc + from instance variable @ivar of an instance of Foo + from member :foo of an instance of Bar +output + class Foo + def initialize + @ivar = Hash.new { |h, k| h[k] = [] } # the default proc holds self, an instance of Foo + end + def inspect = "#" + end + + Bar = Data.define(:foo) + + begin + Ractor.make_shareable(Bar.new(Foo.new)) + rescue Ractor::Error + $!.to_s.lines[1..].join + end +} + +assert_equal '[true, true]', %q{ + class Foo + undef_method :freeze + end + + begin + Ractor.make_shareable Foo.new + rescue Ractor::Error + cause = $!.cause + [cause.class == NoMethodError, cause.name == :freeze] + end +} + assert_equal '["instance-variable", "instance-variable", nil]', %q{ class C @iv1 = "" diff --git a/include/ruby/ractor.h b/include/ruby/ractor.h index 8cfca2162107c8..5d71e1001693fe 100644 --- a/include/ruby/ractor.h +++ b/include/ruby/ractor.h @@ -248,7 +248,7 @@ RBIMPL_SYMBOL_EXPORT_END() static inline bool rb_ractor_shareable_p(VALUE obj) { - bool rb_ractor_shareable_p_continue(VALUE obj); + bool rb_ractor_shareable_p_continue(VALUE obj, VALUE *chain); if (RB_SPECIAL_CONST_P(obj)) { return true; @@ -257,7 +257,7 @@ rb_ractor_shareable_p(VALUE obj) return true; } else { - return rb_ractor_shareable_p_continue(obj); + return rb_ractor_shareable_p_continue(obj, NULL); } } diff --git a/ractor.c b/ractor.c index 2dcbbd10a054bc..c90e5e16df1897 100644 --- a/ractor.c +++ b/ractor.c @@ -1211,7 +1211,8 @@ enum obj_traverse_iterator_result { traverse_stop, }; -typedef enum obj_traverse_iterator_result (*rb_obj_traverse_enter_func)(VALUE obj); +struct obj_traverse_data; +typedef enum obj_traverse_iterator_result (*rb_obj_traverse_enter_func)(VALUE obj, struct obj_traverse_data *data); typedef enum obj_traverse_iterator_result (*rb_obj_traverse_leave_func)(VALUE obj); typedef enum obj_traverse_iterator_result (*rb_obj_traverse_final_func)(VALUE obj); @@ -1222,13 +1223,15 @@ struct obj_traverse_data { rb_obj_traverse_leave_func leave_func; st_table *rec; - VALUE rec_hash; + VALUE rec_hash; // objects seen during traversal + VALUE *chain; // reference chain string built during unwinding (NULL if not needed) + VALUE *exception; // exception raised trying to freeze an object }; - struct obj_traverse_callback_data { bool stop; struct obj_traverse_data *data; + VALUE obj; }; static int obj_traverse_i(VALUE obj, struct obj_traverse_data *data); @@ -1239,11 +1242,13 @@ obj_hash_traverse_i(VALUE key, VALUE val, VALUE ptr) struct obj_traverse_callback_data *d = (struct obj_traverse_callback_data *)ptr; if (obj_traverse_i(key, d->data)) { + rb_ractor_error_chain_append(d->data->chain, "\n from Hash key %+"PRIsVALUE, key); d->stop = true; return ST_STOP; } if (obj_traverse_i(val, d->data)) { + rb_ractor_error_chain_append(d->data->chain, "\n from Hash value at key %+"PRIsVALUE, key); d->stop = true; return ST_STOP; } @@ -1277,6 +1282,9 @@ obj_traverse_ivar_foreach_i(ID key, VALUE val, st_data_t ptr) struct obj_traverse_callback_data *d = (struct obj_traverse_callback_data *)ptr; if (obj_traverse_i(val, d->data)) { + rb_ractor_error_chain_append(d->data->chain, + "\n from instance variable %"PRIsVALUE" of an instance of %"PRIsVALUE, + rb_id2str(key), rb_class_real(CLASS_OF(d->obj))); d->stop = true; return ST_STOP; } @@ -1289,7 +1297,7 @@ obj_traverse_i(VALUE obj, struct obj_traverse_data *data) { if (RB_SPECIAL_CONST_P(obj)) return 0; - switch (data->enter_func(obj)) { + switch (data->enter_func(obj, data)) { case traverse_cont: break; case traverse_skip: return 0; // skip children case traverse_stop: return 1; // stop search @@ -1304,9 +1312,12 @@ obj_traverse_i(VALUE obj, struct obj_traverse_data *data) struct obj_traverse_callback_data d = { .stop = false, .data = data, + .obj = obj, }; rb_ivar_foreach(obj, obj_traverse_ivar_foreach_i, (st_data_t)&d); - if (d.stop) return 1; + if (d.stop) { + return 1; + } switch (BUILTIN_TYPE(obj)) { // no child node @@ -1328,14 +1339,26 @@ obj_traverse_i(VALUE obj, struct obj_traverse_data *data) for (int i = 0; i < RARRAY_LENINT(obj); i++) { VALUE e = rb_ary_entry(obj, i); - if (obj_traverse_i(e, data)) return 1; + if (obj_traverse_i(e, data)) { + rb_ractor_error_chain_append(data->chain, "\n from Array element at index %d", i); + return 1; + } } } break; case T_HASH: { - if (obj_traverse_i(RHASH_IFNONE(obj), data)) return 1; + const VALUE ifnone = RHASH_IFNONE(obj); + if (obj_traverse_i(ifnone, data)) { + if (RB_FL_TEST_RAW(obj, RHASH_PROC_DEFAULT)) { + rb_ractor_error_chain_append(data->chain, "\n from Hash default proc"); + } + else { + rb_ractor_error_chain_append(data->chain, "\n from Hash default value"); + } + return 1; + } struct obj_traverse_callback_data d = { .stop = false, @@ -1352,7 +1375,14 @@ obj_traverse_i(VALUE obj, struct obj_traverse_data *data) const VALUE *ptr = RSTRUCT_CONST_PTR(obj); for (long i=0; ichain, + "\n from member %+"PRIsVALUE" of an instance of %"PRIsVALUE, + member_name, rb_class_real(CLASS_OF(obj))); + return 1; + } } } break; @@ -1423,15 +1453,21 @@ static int rb_obj_traverse(VALUE obj, rb_obj_traverse_enter_func enter_func, rb_obj_traverse_leave_func leave_func, - rb_obj_traverse_final_func final_func) + rb_obj_traverse_final_func final_func, + VALUE *chain, + VALUE *exception) { struct obj_traverse_data data = { .enter_func = enter_func, .leave_func = leave_func, .rec = NULL, + .chain = chain, + .exception = exception, }; - if (obj_traverse_i(obj, &data)) return 1; + if (obj_traverse_i(obj, &data)) { + return 1; + } if (final_func && data.rec) { struct rb_obj_traverse_final_data f = {final_func, 0}; st_foreach(data.rec, obj_traverse_final_i, (st_data_t)&f); @@ -1456,14 +1492,45 @@ allow_frozen_shareable_p(VALUE obj) return false; } +static VALUE +try_freeze(VALUE obj) +{ + rb_funcall(obj, idFreeze, 0); + return Qtrue; +} + +struct rescue_freeze_data { + VALUE exception; +}; + +static VALUE +rescue_freeze(VALUE data, VALUE freeze_exception) +{ + struct rescue_freeze_data *rescue_freeze_data = (struct rescue_freeze_data *)data; + VALUE exception = rb_exc_new3(rb_eRactorError, rb_str_new_cstr("raised calling #freeze")); + rb_ivar_set(exception, rb_intern("cause"), freeze_exception); + rescue_freeze_data->exception = exception; + return Qfalse; +} + static enum obj_traverse_iterator_result -make_shareable_check_shareable_freeze(VALUE obj, enum obj_traverse_iterator_result result) +make_shareable_check_shareable_freeze(VALUE obj, enum obj_traverse_iterator_result result, struct obj_traverse_data *data) { if (!RB_OBJ_FROZEN_RAW(obj)) { - rb_funcall(obj, idFreeze, 0); + struct rescue_freeze_data rescue_freeze_data = { 0 }; + if (!rb_rescue(try_freeze, obj, rescue_freeze, (VALUE)&rescue_freeze_data)) { + if (data->exception) { + *data->exception = rescue_freeze_data.exception; + } + return traverse_stop; + } if (UNLIKELY(!RB_OBJ_FROZEN_RAW(obj))) { - rb_raise(rb_eRactorError, "#freeze does not freeze object correctly"); + VALUE exception = rb_exc_new3(rb_eRactorError, rb_str_new_cstr("#freeze does not freeze object correctly")); + if (data->exception) { + *data->exception = exception; + } + return traverse_stop; } if (RB_OBJ_SHAREABLE_P(obj)) { @@ -1477,7 +1544,7 @@ make_shareable_check_shareable_freeze(VALUE obj, enum obj_traverse_iterator_resu static int obj_refer_only_shareables_p(VALUE obj); static enum obj_traverse_iterator_result -make_shareable_check_shareable(VALUE obj) +make_shareable_check_shareable(VALUE obj, struct obj_traverse_data *data) { VM_ASSERT(!SPECIAL_CONST_P(obj)); @@ -1490,7 +1557,8 @@ make_shareable_check_shareable(VALUE obj) if (type->flags & RUBY_TYPED_FROZEN_SHAREABLE_NO_REC) { if (obj_refer_only_shareables_p(obj)) { - make_shareable_check_shareable_freeze(obj, traverse_skip); + enum obj_traverse_iterator_result result = make_shareable_check_shareable_freeze(obj, traverse_skip, data); + if (result == traverse_stop) return traverse_stop; RB_OBJ_SET_SHAREABLE(obj); return traverse_skip; } @@ -1500,11 +1568,19 @@ make_shareable_check_shareable(VALUE obj) } } else if (rb_obj_is_proc(obj)) { - rb_proc_ractor_make_shareable(obj, Qundef); + if (!rb_proc_ractor_make_shareable_continue(obj, Qundef, data->chain)) { + rb_proc_t *proc = (rb_proc_t *)RTYPEDDATA_DATA(obj); + if (proc->block.type != block_type_iseq) rb_raise(rb_eRuntimeError, "not supported yet"); + + if (data->exception) { + *data->exception = rb_exc_new3(rb_eRactorIsolationError, rb_sprintf("Proc's self is not shareable: %" PRIsVALUE, obj)); + } + return traverse_stop; + } return traverse_cont; } else { - rb_raise(rb_eRactorError, "can not make shareable object for %+"PRIsVALUE, obj); + return traverse_stop; } } @@ -1529,7 +1605,7 @@ make_shareable_check_shareable(VALUE obj) break; } - return make_shareable_check_shareable_freeze(obj, traverse_cont); + return make_shareable_check_shareable_freeze(obj, traverse_cont, data); } static enum obj_traverse_iterator_result @@ -1546,9 +1622,20 @@ mark_shareable(VALUE obj) VALUE rb_ractor_make_shareable(VALUE obj) { - rb_obj_traverse(obj, - make_shareable_check_shareable, - null_leave, mark_shareable); + VALUE chain = Qnil; + VALUE exception = Qfalse; + if (rb_obj_traverse(obj, make_shareable_check_shareable, null_leave, mark_shareable, &chain, &exception)) { + if (exception) { + VALUE id_mesg = rb_intern("mesg"); + VALUE message = rb_attr_get(exception, id_mesg); + message = rb_sprintf("%"PRIsVALUE"%"PRIsVALUE, message, chain); + rb_ivar_set(exception, id_mesg, message); + rb_exc_raise(exception); + } + rb_raise(rb_eRactorError, "can not make shareable object for %+"PRIsVALUE"%"PRIsVALUE, obj, chain); + } + RB_GC_GUARD(chain); + RB_GC_GUARD(exception); return obj; } @@ -1579,7 +1666,7 @@ rb_ractor_ensure_main_ractor(const char *msg) } static enum obj_traverse_iterator_result -shareable_p_enter(VALUE obj) +shareable_p_enter(VALUE obj, struct obj_traverse_data *data) { if (RB_OBJ_SHAREABLE_P(obj)) { return traverse_skip; @@ -1600,11 +1687,9 @@ shareable_p_enter(VALUE obj) } bool -rb_ractor_shareable_p_continue(VALUE obj) +rb_ractor_shareable_p_continue(VALUE obj, VALUE *chain) { - if (rb_obj_traverse(obj, - shareable_p_enter, null_leave, - mark_shareable)) { + if (rb_obj_traverse(obj, shareable_p_enter, null_leave, mark_shareable, chain, NULL)) { return false; } else { @@ -1620,7 +1705,7 @@ rb_ractor_setup_belonging(VALUE obj) } static enum obj_traverse_iterator_result -reset_belonging_enter(VALUE obj) +reset_belonging_enter(VALUE obj, struct obj_traverse_data *data) { if (rb_ractor_shareable_p(obj)) { return traverse_skip; @@ -1642,7 +1727,7 @@ static VALUE ractor_reset_belonging(VALUE obj) { #if RACTOR_CHECK_MODE > 0 - rb_obj_traverse(obj, reset_belonging_enter, null_leave, NULL); + rb_obj_traverse(obj, reset_belonging_enter, null_leave, NULL, NULL, NULL); #endif return obj; } diff --git a/ractor_core.h b/ractor_core.h index d112ff87244944..63ab853501adb8 100644 --- a/ractor_core.h +++ b/ractor_core.h @@ -149,7 +149,7 @@ st_table *rb_ractor_targeted_hooks(rb_ractor_t *cr); RUBY_SYMBOL_EXPORT_BEGIN void rb_ractor_finish_marking(void); -bool rb_ractor_shareable_p_continue(VALUE obj); +bool rb_ractor_shareable_p_continue(VALUE obj, VALUE *chain); // THIS FUNCTION SHOULD NOT CALL WHILE INCREMENTAL MARKING!! // This function is for T_DATA::free_func @@ -270,6 +270,24 @@ rb_ractor_targeted_hooks_cnt(rb_ractor_t *cr) return cr->pub.targeted_hooks_cnt; } +static inline void +rb_ractor_error_chain_append(VALUE *chain_ptr, const char *fmt, ...) +{ + if (!chain_ptr) return; + + va_list args; + va_start(args, fmt); + + if (NIL_P(*chain_ptr)) { + *chain_ptr = rb_vsprintf(fmt, args); + } + else { + rb_str_vcatf(*chain_ptr, fmt, args); + } + + va_end(args); +} + #if RACTOR_CHECK_MODE > 0 # define RACTOR_BELONGING_ID(obj) (*(uint32_t *)(((uintptr_t)(obj)) + rb_gc_obj_slot_size(obj))) diff --git a/test/ruby/test_ractor.rb b/test/ruby/test_ractor.rb index 6ae511217aca09..a60fb58c5237a1 100644 --- a/test/ruby/test_ractor.rb +++ b/test/ruby/test_ractor.rb @@ -213,6 +213,91 @@ def test_ifunc_proc_not_shareable assert_unshareable(pr, /not supported yet/, exception: RuntimeError) end + def test_error_includes_ivar + obj = Class.new do + def initialize + @unshareable = -> {} + end + end.new + assert_unshareable(obj, /from instance variable @unshareable of an instance of # {}], /from Array element at index 1/) + end + + def test_error_includes_hash_key_and_value + assert_unshareable({ unshareable: -> {} }, /from Hash value at key :unshareable/) + end + + def test_error_includes_hash_unshareable_key + assert_unshareable({ -> {} => true }, /from Hash key # {}) + assert_unshareable(s, /from member :member of an instance of TestRactor::S/) + end + + def test_error_includes_block_self + pr = -> {} + assert_unshareable(pr, /from block's self \(an instance of #{self.class.name}\)/) + end + + def test_error_wraps_freeze_error + obj = Class.new do + undef_method :freeze + end.new + e = assert_unshareable(obj, /raised calling #freeze/, exception: Ractor::Error) + assert_equal NoMethodError, e.cause.class + assert_equal :freeze, e.cause.name + end + + def test_error_for_module_instance_variable + assert_ractor(<<~'RUBY') + h = Hash.new {}.freeze + mod = Module.new do + attr_reader :unshareable + @unshareable = h + end + mod.extend(mod) + e = Ractor.new(mod) do |mod| + mod.unshareable + rescue + $! + end.value + assert_kind_of Ractor::IsolationError, e + assert_match(/from Hash default proc/, e.message) + RUBY + end + + def test_error_for_module_constant + assert_ractor(<<~'RUBY') + module ModuleWithUnshareableConstant + UNSHAREABLE = Hash.new {}.freeze + end + + e = Ractor.new do + ModuleWithUnshareableConstant::UNSHAREABLE + rescue + $! + end.value + assert_kind_of(Ractor::IsolationError, e) + assert_match(/from Hash default proc/, e.message) + RUBY + end + def assert_make_shareable(obj) refute Ractor.shareable?(obj), "object was already shareable" Ractor.make_shareable(obj) @@ -221,9 +306,10 @@ def assert_make_shareable(obj) def assert_unshareable(obj, msg=nil, exception: Ractor::IsolationError) refute Ractor.shareable?(obj), "object is already shareable" - assert_raise_with_message(exception, msg) do + e = assert_raise_with_message(exception, msg) do Ractor.make_shareable(obj) end refute Ractor.shareable?(obj), "despite raising, object became shareable" + e end end diff --git a/variable.c b/variable.c index ff8d24d78aef6c..ecca8b89fd25bd 100644 --- a/variable.c +++ b/variable.c @@ -1444,11 +1444,13 @@ rb_ivar_lookup(VALUE obj, ID id, VALUE undef) VALUE val = rb_ivar_lookup(RCLASS_WRITABLE_FIELDS_OBJ(obj), id, undef); if (val != undef && rb_is_instance_id(id) && - UNLIKELY(!rb_ractor_main_p()) && - !rb_ractor_shareable_p(val)) { - rb_raise(rb_eRactorIsolationError, - "can not get unshareable values from instance variables of classes/modules from non-main Ractors (%"PRIsVALUE" from %"PRIsVALUE")", - rb_id2str(id), obj); + UNLIKELY(!rb_ractor_main_p())) { + VALUE chain = Qnil; + if (!rb_ractor_shareable_p_continue(val, &chain)) { + rb_raise(rb_eRactorIsolationError, + "can not get unshareable values from instance variables of classes/modules from non-main Ractors (%"PRIsVALUE" from %"PRIsVALUE")%"PRIsVALUE, + rb_id2str(id), obj, chain); + } } return val; } @@ -3356,8 +3358,11 @@ rb_const_get_0(VALUE klass, ID id, int exclude, int recurse, int visibility) VALUE c = rb_const_search(klass, id, exclude, recurse, visibility, &found_in); if (!UNDEF_P(c)) { if (UNLIKELY(!rb_ractor_main_p())) { - if (!rb_ractor_shareable_p(c)) { - rb_raise(rb_eRactorIsolationError, "can not access non-shareable objects in constant %"PRIsVALUE"::%"PRIsVALUE" by non-main Ractor.", rb_class_path(found_in), rb_id2str(id)); + VALUE chain = Qnil; + if (!rb_ractor_shareable_p_continue(c, &chain)) { + rb_raise(rb_eRactorIsolationError, + "can not access non-shareable objects in constant %"PRIsVALUE"::%"PRIsVALUE" by non-main Ractor.%"PRIsVALUE, + rb_class_path(found_in), rb_id2str(id), chain); } } return c; diff --git a/vm.c b/vm.c index 2cae6779d9cbc1..eb9b719fcc27b5 100644 --- a/vm.c +++ b/vm.c @@ -1568,6 +1568,18 @@ rb_proc_isolate(VALUE self) VALUE rb_proc_ractor_make_shareable(VALUE self, VALUE replace_self) +{ + VALUE chain = Qnil; + if (!rb_proc_ractor_make_shareable_continue(self, replace_self, &chain)) { + rb_raise(rb_eRactorIsolationError, + "Proc's self is not shareable: %" PRIsVALUE "%"PRIsVALUE, + self, chain); + } + return self; +} + +bool +rb_proc_ractor_make_shareable_continue(VALUE self, VALUE replace_self, VALUE *chain) { const rb_iseq_t *iseq = vm_proc_iseq(self); @@ -1580,10 +1592,14 @@ rb_proc_ractor_make_shareable(VALUE self, VALUE replace_self) if (proc->block.type != block_type_iseq) rb_raise(rb_eRuntimeError, "not supported yet"); - if (!rb_ractor_shareable_p(vm_block_self(&proc->block))) { - rb_raise(rb_eRactorIsolationError, - "Proc's self is not shareable: %" PRIsVALUE, - self); + VALUE block_self = vm_block_self(&proc->block); + if (!RB_SPECIAL_CONST_P(block_self) && + !RB_OBJ_SHAREABLE_P(block_self)) { + if (!rb_ractor_shareable_p_continue(block_self, chain)) { + rb_ractor_error_chain_append(chain, "\n from block's self (an instance of %"PRIsVALUE")", + rb_class_real(CLASS_OF(block_self))); + return false; + } } VALUE read_only_variables = Qfalse; @@ -1601,15 +1617,15 @@ rb_proc_ractor_make_shareable(VALUE self, VALUE replace_self) if (block->type != block_type_symbol) rb_raise(rb_eRuntimeError, "not supported yet"); VALUE proc_self = vm_block_self(block); - if (!rb_ractor_shareable_p(proc_self)) { - rb_raise(rb_eRactorIsolationError, - "Proc's self is not shareable: %" PRIsVALUE, - self); + if (!rb_ractor_shareable_p_continue(proc_self, chain)) { + rb_ractor_error_chain_append(chain, "\n from proc's self (an instance of %"PRIsVALUE")", + rb_class_real(CLASS_OF(proc_self))); + return false; } } RB_OBJ_SET_FROZEN_SHAREABLE(self); - return self; + return true; } VALUE diff --git a/vm_core.h b/vm_core.h index 68adc5eac16f32..31f0bc0e6078ca 100644 --- a/vm_core.h +++ b/vm_core.h @@ -1297,6 +1297,8 @@ VALUE rb_proc_isolate_bang(VALUE self, VALUE replace_self); VALUE rb_proc_ractor_make_shareable(VALUE proc, VALUE replace_self); RUBY_SYMBOL_EXPORT_END +bool rb_proc_ractor_make_shareable_continue(VALUE self, VALUE replace_self, VALUE *chain); + typedef struct { VALUE flags; /* imemo header */ rb_iseq_t *iseq; diff --git a/vm_insnhelper.c b/vm_insnhelper.c index a27bf5f49be69c..d772ad05ec53af 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1143,9 +1143,11 @@ vm_get_ev_const(rb_execution_context_t *ec, VALUE orig_klass, ID id, bool allow_ } else { if (UNLIKELY(!rb_ractor_main_p())) { - if (!rb_ractor_shareable_p(val)) { + VALUE chain = Qnil; + if (!rb_ractor_shareable_p_continue(val, &chain)) { rb_raise(rb_eRactorIsolationError, - "can not access non-shareable objects in constant %"PRIsVALUE"::%"PRIsVALUE" by non-main ractor.", rb_class_path(klass), rb_id2str(id)); + "can not access non-shareable objects in constant %"PRIsVALUE"::%"PRIsVALUE" by non-main Ractor.%"PRIsVALUE, + rb_class_path(klass), rb_id2str(id), chain); } } return val;