Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions ext/prism/extension.c
Original file line number Diff line number Diff line change
Expand Up @@ -802,16 +802,14 @@ parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *o

pm_node_t *node = pm_parse(parser);

// Here we need to update the Source object to have the correct
// encoding for the source string and the correct newline offsets.
// We do it here because we've already created the Source object and given
// it over to all of the tokens, and both of these are only set after pm_parse().
/* Update the Source object with the correct encoding and line offsets,
* which are only available after pm_parse() completes. */
rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser));
rb_enc_associate(source_string, encoding);

const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser);
for (size_t index = 0; index < line_offsets->size; index++) {
rb_ary_push(offsets, ULONG2NUM(line_offsets->offsets[index]));
rb_ary_store(offsets, (long) index, ULONG2NUM(line_offsets->offsets[index]));
}

if (pm_options_freeze(options)) {
Expand Down
12 changes: 12 additions & 0 deletions include/prism/internal/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,18 @@ struct pm_parser_t {
*/
uint32_t node_id;

/*
* A single-entry cache for pm_parser_constant_id_raw. Avoids redundant
* constant pool lookups when the same token is resolved multiple times
* (e.g., once during lexing for local variable detection, and again
* during parsing for node creation).
*/
struct {
const uint8_t *start;
const uint8_t *end;
pm_constant_id_t id;
} constant_cache;

/* The current state of the lexer. */
pm_lex_state_t lex_state;

Expand Down
24 changes: 17 additions & 7 deletions lib/prism/parse_result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,26 @@ def self.for(source, start_line, offsets)
# The line number where this source starts.
attr_reader :start_line #: Integer

# The list of newline byte offsets in the source code.
attr_reader :offsets #: Array[Integer]
# The list of newline byte offsets in the source code. When initialized from
# the C extension, this may be a packed binary string of uint32_t values
# that is lazily unpacked on first access.
#--
#: () -> Array[Integer]
def offsets
offsets = @offsets
return offsets if offsets.is_a?(Array)
@offsets = offsets.unpack("L*")
end

# Create a new source object with the given source code.
# Create a new source object with the given source code. The offsets
# parameter can be either an Array of Integer byte offsets or a packed
# binary string of uint32_t values (from the C extension).
#--
#: (String source, Integer start_line, Array[Integer] offsets) -> void
#: (String source, Integer start_line, Array[Integer] | String offsets) -> void
def initialize(source, start_line, offsets)
@source = source
@start_line = start_line # set after parsing is done
@offsets = offsets # set after parsing is done
@start_line = start_line
@offsets = offsets
end

# Replace the value of start_line with the given value.
Expand All @@ -81,7 +91,7 @@ def replace_start_line(start_line)
#--
#: (Array[Integer] offsets) -> void
def replace_offsets(offsets)
@offsets.replace(offsets)
@offsets = offsets
end

# Returns the encoding of the source code, which is set by parameters to the
Expand Down
12 changes: 8 additions & 4 deletions rbi/generated/prism/parse_result.rbi

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 11 additions & 5 deletions sig/generated/prism/parse_result.rbs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 31 additions & 1 deletion src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -1120,7 +1120,19 @@ pm_locals_order(pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t
*/
static PRISM_INLINE pm_constant_id_t
pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
return pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start));
/* Fast path: if this is the same token as the last lookup (same pointer
* range), return the cached result. */
if (start == parser->constant_cache.start && end == parser->constant_cache.end) {
return parser->constant_cache.id;
}

pm_constant_id_t id = pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start));

parser->constant_cache.start = start;
parser->constant_cache.end = end;
parser->constant_cache.id = id;

return id;
}

/**
Expand Down Expand Up @@ -12589,6 +12601,14 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
}

/**
* Returns true if the current token is any of the six given types.
*/
static PRISM_INLINE bool
match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
}

/**
* Returns true if the current token is any of the seven given types.
*/
Expand Down Expand Up @@ -15091,6 +15111,16 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
*/
static bool
parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, uint8_t flags, uint16_t depth) {
/* Fast path: if the current token can't begin an expression and isn't
* a parenthesis, block opener, or splat/block-pass operator, there are
* no arguments to parse. */
if (
!token_begins_expression_p(parser->current.type) &&
!match6(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)
) {
return false;
}

bool found = false;
bool parsed_command_args = false;

Expand Down
20 changes: 9 additions & 11 deletions templates/ext/prism/api_node.c.erb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
#include "prism/extension.h"
#include "prism/internal/allocator.h"
#include "prism/internal/arena.h"

#include <assert.h>

Expand Down Expand Up @@ -81,11 +82,7 @@ pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze) {
VALUE source_string = rb_enc_str_new((const char *) start, pm_parser_end(parser) - start, encoding);

const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser);
VALUE offsets = rb_ary_new_capa(line_offsets->size);

for (size_t index = 0; index < line_offsets->size; index++) {
rb_ary_push(offsets, ULONG2NUM(line_offsets->offsets[index]));
}
VALUE offsets = rb_str_new((const char *) line_offsets->offsets, line_offsets->size * sizeof(uint32_t));

if (freeze) {
rb_obj_freeze(source_string);
Expand All @@ -105,8 +102,8 @@ typedef struct pm_node_stack_node {
} pm_node_stack_node_t;

static void
pm_node_stack_push(pm_node_stack_node_t **stack, const pm_node_t *visit) {
pm_node_stack_node_t *node = xmalloc(sizeof(pm_node_stack_node_t));
pm_node_stack_push(pm_arena_t *arena, pm_node_stack_node_t **stack, const pm_node_t *visit) {
pm_node_stack_node_t *node = (pm_node_stack_node_t *) pm_arena_alloc(arena, sizeof(pm_node_stack_node_t), PRISM_ALIGNOF(pm_node_stack_node_t));
node->prev = *stack;
node->visit = visit;
node->visited = false;
Expand All @@ -119,7 +116,6 @@ pm_node_stack_pop(pm_node_stack_node_t **stack) {
const pm_node_t *visit = current->visit;

*stack = current->prev;
xfree_sized(current, sizeof(pm_node_stack_node_t));

return visit;
}
Expand Down Expand Up @@ -151,8 +147,9 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
pm_ast_constants_each_data_t constants_data = { .constants = constants, .encoding = encoding };
pm_parser_constants_each(parser, pm_ast_constants_each, &constants_data);

pm_arena_t *node_arena = pm_arena_new();
pm_node_stack_node_t *node_stack = NULL;
pm_node_stack_push(&node_stack, node);
pm_node_stack_push(node_arena, &node_stack, node);
VALUE value_stack = rb_ary_new();

while (node_stack != NULL) {
Expand All @@ -175,10 +172,10 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
<%- node.fields.each do |field| -%>
<%- case field -%>
<%- when Prism::Template::NodeField, Prism::Template::OptionalNodeField -%>
pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>);
pm_node_stack_push(node_arena, &node_stack, (pm_node_t *) cast-><%= field.name %>);
<%- when Prism::Template::NodeListField -%>
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>.nodes[index]);
pm_node_stack_push(node_arena, &node_stack, (pm_node_t *) cast-><%= field.name %>.nodes[index]);
}
<%- end -%>
<%- end -%>
Expand Down Expand Up @@ -280,6 +277,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
}
}

pm_arena_free(node_arena);
return rb_ary_pop(value_stack);
}

Expand Down