From cbe91a3f36fb3f103592a8f427a64a66c8630d98 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 19 Mar 2026 12:14:34 -0500 Subject: [PATCH 1/4] Swich identifiers to byte[] * JAVA_STRING_TYPE and related change to IDENTIFIER * TruffleRuby still uses java.lang.String Fixes #4009 --- templates/java/org/ruby_lang/prism/Loader.java.erb | 8 ++++---- templates/java/org/ruby_lang/prism/Nodes.java.erb | 6 +++--- templates/template.rb | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/templates/java/org/ruby_lang/prism/Loader.java.erb b/templates/java/org/ruby_lang/prism/Loader.java.erb index 534d8401ca..bf1bf7a625 100644 --- a/templates/java/org/ruby_lang/prism/Loader.java.erb +++ b/templates/java/org/ruby_lang/prism/Loader.java.erb @@ -1,4 +1,4 @@ -<%- string_type = Prism::Template::JAVA_STRING_TYPE -%> +<%- string_type = Prism::Template::JAVA_IDENTIFIER_TYPE -%> package org.ruby_lang.prism; import java.lang.Short; @@ -31,7 +31,7 @@ public class Loader { <%- if string_type == "String" -%> return new String(bytes, encodingCharset).intern(); <%- else -%> - return null; // Must be implemented by subclassing Loader + return bytes; // Must be implemented by subclassing Loader <%- end -%> } @@ -229,7 +229,7 @@ public class Loader { private <%= string_type %>[] loadConstants() { int length = loadVarUInt(); if (length == 0) { - return Nodes.EMPTY_STRING_ARRAY; + return Nodes.EMPTY_IDENTIFIER_ARRAY; } <%= string_type %>[] constants = new <%= string_type %>[length]; for (int i = 0; i < length; i++) { @@ -395,7 +395,7 @@ public class Loader { int bufferPosition = buffer.position(); int serializedLength = buffer.getInt(); // Load everything except the body and locals, because the name, receiver, parameters are still needed for lazily defining the method - Nodes.DefNode lazyDefNode = new Nodes.DefNode(<%= base_params.join(", ") -%>, -bufferPosition, this, loadConstant(), loadOptionalNode(), (Nodes.ParametersNode) loadOptionalNode(), null, Nodes.EMPTY_STRING_ARRAY); + Nodes.DefNode lazyDefNode = new Nodes.DefNode(<%= base_params.join(", ") -%>, -bufferPosition, this, loadConstant(), loadOptionalNode(), (Nodes.ParametersNode) loadOptionalNode(), null, Nodes.EMPTY_IDENTIFIER_ARRAY); buffer.position(bufferPosition + serializedLength); // skip past the serialized DefNode return lazyDefNode; } diff --git a/templates/java/org/ruby_lang/prism/Nodes.java.erb b/templates/java/org/ruby_lang/prism/Nodes.java.erb index de597eea67..f43df2623e 100644 --- a/templates/java/org/ruby_lang/prism/Nodes.java.erb +++ b/templates/java/org/ruby_lang/prism/Nodes.java.erb @@ -1,4 +1,4 @@ -<%- string_type = Prism::Template::JAVA_STRING_TYPE -%> +<%- id_type = Prism::Template::JAVA_IDENTIFIER_TYPE -%> package org.ruby_lang.prism; import java.lang.Override; @@ -16,7 +16,7 @@ import java.util.Arrays; // @formatter:off public abstract class Nodes { - public static final <%= string_type %>[] EMPTY_STRING_ARRAY = {}; + public static final <%= id_type %>[] EMPTY_IDENTIFIER_ARRAY = {}; @Target(ElementType.FIELD) @Retention(RetentionPolicy.SOURCE) @@ -383,7 +383,7 @@ public abstract class Nodes { builder.append('\n'); <%- when Prism::Template::ConstantListField -%> builder.append('\n'); - for (<%= string_type %> constant : this.<%= field.name %>) { + for (<%= id_type %> constant : this.<%= field.name %>) { builder.append(nextNextIndent).append('"').append(constant).append('"').append('\n'); } <%- when Prism::Template::Flags -%> diff --git a/templates/template.rb b/templates/template.rb index 8f7734dd43..be39c2f682 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -12,7 +12,7 @@ module Template # :nodoc: all CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false) JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby" - JAVA_STRING_TYPE = JAVA_BACKEND == "jruby" ? "org.jruby.RubySymbol" : "String" + JAVA_IDENTIFIER_TYPE = JAVA_BACKEND == "truffleruby" ? "String" : "byte[]" INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby" COMMON_FLAGS_COUNT = 2 @@ -272,7 +272,7 @@ def call_seq_type end def java_type - JAVA_STRING_TYPE + JAVA_IDENTIFIER_TYPE end end @@ -292,7 +292,7 @@ def call_seq_type end def java_type - JAVA_STRING_TYPE + JAVA_IDENTIFIER_TYPE end end @@ -312,7 +312,7 @@ def call_seq_type end def java_type - "#{JAVA_STRING_TYPE}[]" + "#{JAVA_IDENTIFIER_TYPE}[]" end end From 6ad180a00d51b8b9fb3896cb98da1740db5c37c0 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Fri, 20 Mar 2026 09:59:53 -0500 Subject: [PATCH 2/4] Tweaks for byte[] identifiers * Allocate array of byte[] as byte[length][]. * Default JAVA_BACKEND to "default" with "truffleruby" the custom option. --- .../java/org/ruby_lang/prism/Loader.java.erb | 34 ++++++++++++------- templates/template.rb | 2 +- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/templates/java/org/ruby_lang/prism/Loader.java.erb b/templates/java/org/ruby_lang/prism/Loader.java.erb index bf1bf7a625..ae8007bf90 100644 --- a/templates/java/org/ruby_lang/prism/Loader.java.erb +++ b/templates/java/org/ruby_lang/prism/Loader.java.erb @@ -1,4 +1,4 @@ -<%- string_type = Prism::Template::JAVA_IDENTIFIER_TYPE -%> +<%- id_type = Prism::Template::JAVA_IDENTIFIER_TYPE -%> package org.ruby_lang.prism; import java.lang.Short; @@ -27,8 +27,8 @@ public class Loader { return Charset.forName(encodingName); } - public <%= string_type %> bytesToName(byte[] bytes) { - <%- if string_type == "String" -%> + public <%= id_type %> bytesToName(byte[] bytes) { + <%- if id_type == "String" -%> return new String(bytes, encodingCharset).intern(); <%- else -%> return bytes; // Must be implemented by subclassing Loader @@ -39,17 +39,21 @@ public class Loader { private final Loader loader; private final int bufferOffset; - private final <%= string_type %>[] cache; + private final <%= id_type %>[] cache; ConstantPool(Loader loader, int bufferOffset, int length) { this.loader = loader; this.bufferOffset = bufferOffset; - cache = new <%= string_type %>[length]; + <%- if id_type == "String" -%> + cache = new <%= id_type %>[length]; + <%- else -%> + cache = new byte[length][]; + <%- end -%> } - <%= string_type %> get(ByteBuffer buffer, int oneBasedIndex) { + <%= id_type %> get(ByteBuffer buffer, int oneBasedIndex) { int index = oneBasedIndex - 1; - <%= string_type %> constant = cache[index]; + <%= id_type %> constant = cache[index]; if (constant == null) { int offset = bufferOffset + index * 8; @@ -70,7 +74,7 @@ public class Loader { private final ByteBuffer buffer; protected String encodingName; - <%- if string_type == "String" -%> + <%- if id_type == "String" -%> private Charset encodingCharset; <%- end -%> private ConstantPool constantPool; @@ -100,7 +104,7 @@ public class Loader { byte[] encodingNameBytes = new byte[encodingLength]; buffer.get(encodingNameBytes); this.encodingName = new String(encodingNameBytes, StandardCharsets.US_ASCII); - <%- if string_type == "String" -%> + <%- if id_type == "String" -%> this.encodingCharset = getEncodingCharset(this.encodingName); <%- end -%> @@ -213,11 +217,11 @@ public class Loader { } } - private <%= string_type %> loadConstant() { + private <%= id_type %> loadConstant() { return constantPool.get(buffer, loadVarUInt()); } - private <%= string_type %> loadOptionalConstant() { + private <%= id_type %> loadOptionalConstant() { if (buffer.get(buffer.position()) != 0) { return loadConstant(); } else { @@ -226,12 +230,16 @@ public class Loader { } } - private <%= string_type %>[] loadConstants() { + private <%= id_type %>[] loadConstants() { int length = loadVarUInt(); if (length == 0) { return Nodes.EMPTY_IDENTIFIER_ARRAY; } - <%= string_type %>[] constants = new <%= string_type %>[length]; + <%- if id_type == "String" -%> + <%= id_type %>[] constants = new <%= id_type %>[length]; + <%- else -%> + <%= id_type %>[] constants = new byte[length][]; + <%- end -%> for (int i = 0; i < length; i++) { constants[i] = constantPool.get(buffer, loadVarUInt()); } diff --git a/templates/template.rb b/templates/template.rb index be39c2f682..5d1afc9506 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -11,7 +11,7 @@ module Template # :nodoc: all REMOVE_ON_ERROR_TYPES = SERIALIZE_ONLY_SEMANTICS_FIELDS CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false) - JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby" + JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "default" JAVA_IDENTIFIER_TYPE = JAVA_BACKEND == "truffleruby" ? "String" : "byte[]" INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby" From 5de563794a607fd02ff762be73aaf839e6e17bfb Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Fri, 20 Mar 2026 10:49:52 -0500 Subject: [PATCH 3/4] Remove no-longer-relevant comment Co-authored-by: Benoit Daloze --- templates/java/org/ruby_lang/prism/Loader.java.erb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/java/org/ruby_lang/prism/Loader.java.erb b/templates/java/org/ruby_lang/prism/Loader.java.erb index ae8007bf90..5695e5a651 100644 --- a/templates/java/org/ruby_lang/prism/Loader.java.erb +++ b/templates/java/org/ruby_lang/prism/Loader.java.erb @@ -31,7 +31,7 @@ public class Loader { <%- if id_type == "String" -%> return new String(bytes, encodingCharset).intern(); <%- else -%> - return bytes; // Must be implemented by subclassing Loader + return bytes; <%- end -%> } From ccafd2a7d633ce4eea6e69027a6db83c65b3f092 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Fri, 20 Mar 2026 15:52:22 -0500 Subject: [PATCH 4/4] Eliminate encodingCharset and bytesToName With identifiers moving to byte[] there's no need to have bytesToName in the logic. It remains as an abstract method in the String version for TruffleRuby to implement their way. --- .../java/org/ruby_lang/prism/Loader.java.erb | 28 +++++-------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/templates/java/org/ruby_lang/prism/Loader.java.erb b/templates/java/org/ruby_lang/prism/Loader.java.erb index 5695e5a651..3e44cccc14 100644 --- a/templates/java/org/ruby_lang/prism/Loader.java.erb +++ b/templates/java/org/ruby_lang/prism/Loader.java.erb @@ -19,21 +19,9 @@ public class Loader { // Overridable methods - public Charset getEncodingCharset(String encodingName) { - encodingName = encodingName.toLowerCase(Locale.ROOT); - if (encodingName.equals("ascii-8bit")) { - return StandardCharsets.US_ASCII; - } - return Charset.forName(encodingName); - } - - public <%= id_type %> bytesToName(byte[] bytes) { - <%- if id_type == "String" -%> - return new String(bytes, encodingCharset).intern(); - <%- else -%> - return bytes; - <%- end -%> - } + <%- if id_type == "String" -%> + public abstract <%= id_type %> bytesToName(byte[] bytes); + <%- end -%> private static final class ConstantPool { @@ -63,7 +51,11 @@ public class Loader { byte[] bytes = new byte[length]; buffer.get(start, bytes); + <%- if id_type == "byte[]" -%> + constant = bytes; + <%- else %> constant = loader.bytesToName(bytes); + <%- end %> cache[index] = constant; } @@ -74,9 +66,6 @@ public class Loader { private final ByteBuffer buffer; protected String encodingName; - <%- if id_type == "String" -%> - private Charset encodingCharset; - <%- end -%> private ConstantPool constantPool; private Nodes.Source source = null; @@ -104,9 +93,6 @@ public class Loader { byte[] encodingNameBytes = new byte[encodingLength]; buffer.get(encodingNameBytes); this.encodingName = new String(encodingNameBytes, StandardCharsets.US_ASCII); - <%- if id_type == "String" -%> - this.encodingCharset = getEncodingCharset(this.encodingName); - <%- end -%> source.setStartLine(loadVarSInt()); source.setLineOffsets(loadLineOffsets());