From 128ab52be9d72fc9feb22032b48cd168e2753b83 Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Tue, 3 Feb 2026 12:10:01 +0100
Subject: [PATCH] Better guard against syntax invalid code in ripper lex
 translator

Closes #3899

Also better compatibility by only dropping
the last token if it is actually EOF
---
 lib/prism/lex_compat.rb        |  9 +++++----
 test/prism/ruby/ripper_test.rb | 11 +++++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index 4c516a9de0..5b685716cc 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -758,8 +758,9 @@ def result
         end
       end
 
-      # Drop the EOF token from the list
-      tokens = tokens[0...-1]
+      # Drop the EOF token from the list. The EOF token may not be
+      # present if the source was syntax invalid
+      tokens = tokens[0...-1] if tokens.dig(-1, 1) == :on_eof
 
       # We sort by location because Ripper.lex sorts.
       tokens.sort_by! do |token|
@@ -804,7 +805,7 @@ def insert_on_sp(tokens, source, data_loc, bom, eof_token)
             next_whitespace_index += 1
             first_whitespace = sp_value[0...continuation_index]
             continuation = sp_value[continuation_index...next_whitespace_index]
-            second_whitespace = sp_value[next_whitespace_index..]
+            second_whitespace = sp_value[next_whitespace_index..] || ""
 
             new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty?
             new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state]
@@ -819,7 +820,7 @@ def insert_on_sp(tokens, source, data_loc, bom, eof_token)
         prev_token_end = start_offset + token[2].bytesize
       end
 
-      unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
+      if !data_loc && eof_token # no trailing :on_sp with __END__ as it is always preceded by :on_nl
         end_offset = eof_token.location.end_offset
         if prev_token_end < end_offset
           new_tokens << [
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index 52a5ad7ef4..758505ac2a 100644
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -140,6 +140,17 @@ def test_lexer
       assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) }
     end
 
+
+    # On syntax invalid code the output doesn't always match up
+    # In these cases we just want to make sure that it doesn't raise.
+    def test_lex_invalid_syntax
+      assert_nothing_raised do
+        Translation::Ripper.lex('scan/\p{alpha}/')
+      end
+
+      assert_equal(Ripper.lex('if;)'), Translation::Ripper.lex('if;)'))
+    end
+
     def test_tokenize
       source = "foo;1;BAZ"
       assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source))