From 128ab52be9d72fc9feb22032b48cd168e2753b83 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Tue, 3 Feb 2026 12:10:01 +0100 Subject: [PATCH] Better guard against syntax invalid code in ripper lex translator Closes #3899 Also better compatibility by only dropping the last token if it is actually EOF --- lib/prism/lex_compat.rb | 9 +++++---- test/prism/ruby/ripper_test.rb | 11 +++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 4c516a9de0..5b685716cc 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -758,8 +758,9 @@ def result end end - # Drop the EOF token from the list - tokens = tokens[0...-1] + # Drop the EOF token from the list. The EOF token may not be + # present if the source was syntax invalid + tokens = tokens[0...-1] if tokens.dig(-1, 1) == :on_eof # We sort by location because Ripper.lex sorts. tokens.sort_by! do |token| @@ -804,7 +805,7 @@ def insert_on_sp(tokens, source, data_loc, bom, eof_token) next_whitespace_index += 1 first_whitespace = sp_value[0...continuation_index] continuation = sp_value[continuation_index...next_whitespace_index] - second_whitespace = sp_value[next_whitespace_index..] + second_whitespace = sp_value[next_whitespace_index..] || "" new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty? new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state] @@ -819,7 +820,7 @@ def insert_on_sp(tokens, source, data_loc, bom, eof_token) prev_token_end = start_offset + token[2].bytesize end - unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl + if !data_loc && eof_token # no trailing :on_sp with __END__ as it is always preceded by :on_nl end_offset = eof_token.location.end_offset if prev_token_end < end_offset new_tokens << [ diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 52a5ad7ef4..758505ac2a 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -140,6 +140,17 @@ def test_lexer assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) } end + + # On syntax invalid code the output doesn't always match up + # In these cases we just want to make sure that it doesn't raise. + def test_lex_invalid_syntax + assert_nothing_raised do + Translation::Ripper.lex('scan/\p{alpha}/') + end + + assert_equal(Ripper.lex('if;)'), Translation::Ripper.lex('if;)')) + end + def test_tokenize source = "foo;1;BAZ" assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source))