diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 6b9bde51ea..fb5ca055b7 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -2,285 +2,49 @@ # :markup: markdown # typed: ignore -# This file is responsible for mirroring the API provided by the C extension by -# using FFI to call into the shared library. - -require "rbconfig" -require "ffi" - -# We want to eagerly load this file if there are Ractors so that it does not get -# autoloaded from within a non-main Ractor. -require "prism/serialize" if defined?(Ractor) - -module Prism # :nodoc: - module LibRubyParser # :nodoc: - extend FFI::Library - - # Define the library that we will be pulling functions from. Note that this - # must align with the build shared library from make/rake. - libprism_in_build = File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__) - libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}" - - if File.exist?(libprism_in_build) - INCLUDE_DIR = File.expand_path("../../include", __dir__) - ffi_lib libprism_in_build - else - INCLUDE_DIR = "#{RbConfig::CONFIG["libdir"]}/prism/include" - ffi_lib libprism_in_libdir - end - - # Convert a native C type declaration into a symbol that FFI understands. - # For example: - # - # const char * -> :pointer - # bool -> :bool - # size_t -> :size_t - # void -> :void - # - def self.resolve_type(type, callbacks) - type = type.strip - - if !type.end_with?("*") - type.delete_prefix("const ").to_sym - else - type = type.delete_suffix("*").rstrip - callbacks.include?(type.to_sym) ? type.to_sym : :pointer - end - end - - # Read through the given header file and find the declaration of each of the - # given functions. For each one, define a function with the same name and - # signature as the C function. - def self.load_exported_functions_from(header, *functions, callbacks) - File.foreach("#{INCLUDE_DIR}/#{header}") do |line| - # We only want to attempt to load exported functions. - next unless line.start_with?("PRISM_EXPORTED_FUNCTION ") - - # We only want to load the functions that we are interested in. - next unless functions.any? { |function| line.include?(function) } - - # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.) - line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");") - - # Parse the function declaration. - unless /^PRISM_EXPORTED_FUNCTION (?.+) (?\w+)\((?.+)\);$/ =~ line - raise "Could not parse #{line}" - end - - # Delete the function from the list of functions we are looking for to - # mark it as having been found. - functions.delete(name) - - # Split up the argument types into an array, ensure we handle the case - # where there are no arguments (by explicit void). - arg_types = arg_types.split(",").map(&:strip) - arg_types = [] if arg_types == %w[void] - - # Resolve the type of the argument by dropping the name of the argument - # first if it is present. - arg_types.map! { |type| resolve_type(type.sub(/\w+$/, ""), callbacks) } - - # Attach the function using the FFI library. - attach_function name, arg_types, resolve_type(return_type, []) - end - - # If we didn't find all of the functions, raise an error. - raise "Could not find functions #{functions.inspect}" unless functions.empty? - end - - callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer - callback :pm_source_stream_feof_t, [:pointer], :int - pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR] - enum :pm_source_init_result_t, pm_source_init_result_values - enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] - - # Ractor-safe lookup table for pm_source_init_result_t, since FFI's - # enum_type accesses module instance variables that are not shareable. - SOURCE_INIT_RESULT = pm_source_init_result_values.freeze - - load_exported_functions_from( - "prism/version.h", - "pm_version", - [] - ) - - load_exported_functions_from( - "prism/serialize.h", - "pm_serialize_parse", - "pm_serialize_parse_stream", - "pm_serialize_parse_comments", - "pm_serialize_lex", - "pm_serialize_parse_lex", - "pm_serialize_parse_success_p", - [] - ) - - load_exported_functions_from( - "prism/string_query.h", - "pm_string_query_local", - "pm_string_query_constant", - "pm_string_query_method_name", - [] - ) - - load_exported_functions_from( - "prism/buffer.h", - "pm_buffer_new", - "pm_buffer_value", - "pm_buffer_length", - "pm_buffer_free", - [] - ) - - load_exported_functions_from( - "prism/source.h", - "pm_source_file_new", - "pm_source_mapped_new", - "pm_source_stream_new", - "pm_source_free", - "pm_source_source", - "pm_source_length", - [:pm_source_stream_fgets_t, :pm_source_stream_feof_t] - ) - - # This object represents a pm_buffer_t. We only use it as an opaque pointer, - # so it doesn't need to know the fields of pm_buffer_t. - class PrismBuffer # :nodoc: - attr_reader :pointer - - def initialize(pointer) - @pointer = pointer - end - - def value - LibRubyParser.pm_buffer_value(pointer) - end - - def length - LibRubyParser.pm_buffer_length(pointer) - end - - def read - value.read_string(length) - end - - # Initialize a new buffer and yield it to the block. The buffer will be - # automatically freed when the block returns. - def self.with - buffer = LibRubyParser.pm_buffer_new - raise unless buffer - - begin - yield new(buffer) - ensure - LibRubyParser.pm_buffer_free(buffer) - end - end - end - - # This object represents source code to be parsed. For strings it wraps a - # pointer directly; for files it uses a pm_source_t under the hood. - class PrismSource # :nodoc: - PLATFORM_EXPECTS_UTF8 = - RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i) - - attr_reader :pointer, :length - - def initialize(pointer, length, from_string) - @pointer = pointer - @length = length - @from_string = from_string - end - - def read - raise "should use the original String instead" if @from_string - @pointer.read_string(@length) - end - - # Yields a PrismSource backed by the given string to the block. - def self.with_string(string) - raise TypeError unless string.is_a?(String) - - length = string.bytesize - # + 1 to never get an address of 0, which pm_parser_init() asserts - FFI::MemoryPointer.new(:char, length + 1, false) do |pointer| - pointer.write_string(string) - # since we have the extra byte we might as well \0-terminate - pointer.put_char(length, 0) - return yield new(pointer, length, true) - end - end - - # Yields a PrismSource to the given block, backed by a pm_source_t. - def self.with_file(filepath) - raise TypeError unless filepath.is_a?(String) - - # On Windows and Mac, it's expected that filepaths will be encoded in - # UTF-8. If they are not, we need to convert them to UTF-8 before - # passing them into pm_source_mapped_new. - if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8 - filepath = filepath.encode(Encoding::UTF_8) - end - - FFI::MemoryPointer.new(:int) do |result_ptr| - pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr) - - case SOURCE_INIT_RESULT[result_ptr.read_int] - when :PM_SOURCE_INIT_SUCCESS - pointer = LibRubyParser.pm_source_source(pm_source) - length = LibRubyParser.pm_source_length(pm_source) - return yield new(pointer, length, false) - when :PM_SOURCE_INIT_ERROR_GENERIC - raise SystemCallError.new(filepath, FFI.errno) - when :PM_SOURCE_INIT_ERROR_DIRECTORY - raise Errno::EISDIR.new(filepath) - when :PM_SOURCE_INIT_ERROR_NON_REGULAR - # Fall back to reading the file through Ruby IO for non-regular - # files (pipes, character devices, etc.) - return with_string(File.read(filepath)) { |string| yield string } - else - raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}" - end - ensure - LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? - end - end - end +require_relative "ffi/common" + +begin + require_relative "ffi/native_ffi.rb" +rescue LoadError + if RUBY_ENGINE == "jruby" + require_relative "ffi/wasm_ffi.rb" + else + raise end +end - # Mark the LibRubyParser module as private as it should only be called through - # the prism module. - private_constant :LibRubyParser +module Prism # :nodoc: # The version constant is set by reading the result of calling pm_version. - VERSION = LibRubyParser.pm_version.read_string.freeze + VERSION = FFICommon.version class << self # Mirror the Prism.dump API by using the serialization API. def dump(source, **options) - LibRubyParser::PrismSource.with_string(source) { |string| dump_common(string, options) } + FFICommon.with_string(source) { |string| FFICommon.dump(string, options) } end # Mirror the Prism.dump_file API by using the serialization API. def dump_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| dump_common(string, options) } + FFICommon.with_file(filepath) { |string| FFICommon.dump(string, options) } end # Mirror the Prism.lex API by using the serialization API. def lex(code, **options) - LibRubyParser::PrismSource.with_string(code) { |string| lex_common(string, code, options) } + FFICommon.with_string(code) { |string| FFICommon.lex(string, code, options) } end # Mirror the Prism.lex_file API by using the serialization API. def lex_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| lex_common(string, string.read, options) } + FFICommon.with_file(filepath) { |string| FFICommon.lex(string, string.read, options) } end # Mirror the Prism.parse API by using the serialization API. def parse(code, **options) - LibRubyParser::PrismSource.with_string(code) { |string| parse_common(string, code, options) } + FFICommon.with_string(code) { |string| FFICommon.parse(string, code, options) } end # Mirror the Prism.parse_file API by using the serialization API. This uses @@ -288,12 +52,12 @@ def parse(code, **options) # when it is available. def parse_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| parse_common(string, string.read, options) } + FFICommon.with_file(filepath) { |string| FFICommon.parse(string, string.read, options) } end # Mirror the Prism.parse_stream API by using the serialization API. def parse_stream(stream, **options) - LibRubyParser::PrismBuffer.with do |buffer| + FFICommon.with_buffer do |buffer| source = +"" callback = -> (string, size, _) { raise "Expected size to be >= 0, got: #{size}" if size <= 0 @@ -306,19 +70,13 @@ def parse_stream(stream, **options) eof_callback = -> (_) { stream.eof? } - pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback) - begin - LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options)) - Prism.load(source, buffer.read, options.fetch(:freeze, false)) - ensure - LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? - end + FFICommon.parse_stream(buffer, callback, eof_callback, options, source) end end # Mirror the Prism.parse_comments API by using the serialization API. def parse_comments(code, **options) - LibRubyParser::PrismSource.with_string(code) { |string| parse_comments_common(string, code, options) } + FFICommon.with_string(code) { |string| FFICommon.parse_comments(string, code, options) } end # Mirror the Prism.parse_file_comments API by using the serialization @@ -326,23 +84,23 @@ def parse_comments(code, **options) # to use mmap when it is available. def parse_file_comments(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } + FFICommon.with_file(filepath) { |string| FFICommon.parse_comments(string, string.read, options) } end # Mirror the Prism.parse_lex API by using the serialization API. def parse_lex(code, **options) - LibRubyParser::PrismSource.with_string(code) { |string| parse_lex_common(string, code, options) } + FFICommon.with_string(code) { |string| FFICommon.parse_lex(string, code, options) } end # Mirror the Prism.parse_lex_file API by using the serialization API. def parse_lex_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } + FFICommon.with_file(filepath) { |string| FFICommon.parse_lex(string, string.read, options) } end # Mirror the Prism.parse_success? API by using the serialization API. def parse_success?(code, **options) - LibRubyParser::PrismSource.with_string(code) { |string| parse_file_success_common(string, options) } + FFICommon.with_string(code) { |string| FFICommon.parse_file_success(string, options) } end # Mirror the Prism.parse_failure? API by using the serialization API. @@ -353,7 +111,7 @@ def parse_failure?(code, **options) # Mirror the Prism.parse_file_success? API by using the serialization API. def parse_file_success?(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| parse_file_success_common(string, options) } + FFICommon.with_file(filepath) { |string| FFICommon.parse_file_success(string, options) } end # Mirror the Prism.parse_file_failure? API by using the serialization API. @@ -363,9 +121,9 @@ def parse_file_failure?(filepath, **options) # Mirror the Prism.profile API by using the serialization API. def profile(source, **options) - LibRubyParser::PrismSource.with_string(source) do |string| - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + FFICommon.with_string(source) do |string| + FFICommon.with_buffer do |buffer| + FFICommon.parse_only(buffer, string, options) nil end end @@ -373,205 +131,15 @@ def profile(source, **options) # Mirror the Prism.profile_file API by using the serialization API. def profile_file(filepath, **options) - LibRubyParser::PrismSource.with_file(filepath) do |string| - LibRubyParser::PrismBuffer.with do |buffer| + FFICommon.with_file(filepath) do |string| + FFICommon.with_buffer do |buffer| options[:filepath] = filepath - LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + FFICommon.parse_only(buffer, string, options) nil end end end - private - - def dump_common(string, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) - - dumped = buffer.read - dumped.freeze if options.fetch(:freeze, false) - - dumped - end - end - - def lex_common(string, code, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false)) - end - end - - def parse_common(string, code, options) # :nodoc: - serialized = dump_common(string, options) - Serialize.load_parse(code, serialized, options.fetch(:freeze, false)) - end - - def parse_comments_common(string, code, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false)) - end - end - - def parse_lex_common(string, code, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false)) - end - end - - def parse_file_success_common(string, options) # :nodoc: - LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options)) - end - - # Return the value that should be dumped for the command_line option. - def dump_options_command_line(options) - command_line = options.fetch(:command_line, "") - raise ArgumentError, "command_line must be a string" unless command_line.is_a?(String) - - command_line.each_char.inject(0) do |value, char| - case char - when "a" then value | 0b000001 - when "e" then value | 0b000010 - when "l" then value | 0b000100 - when "n" then value | 0b001000 - when "p" then value | 0b010000 - when "x" then value | 0b100000 - else raise ArgumentError, "invalid command_line option: #{char}" - end - end - end - - # Return the value that should be dumped for the version option. - def dump_options_version(version) - case version - when "current" - version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION) - when "latest", nil - 0 # Handled in pm_parser_init - when "nearest" - dump = version_string_to_number(RUBY_VERSION) - return dump if dump - if RUBY_VERSION < "3.3" - version_string_to_number("3.3") - else - 0 # Handled in pm_parser_init - end - else - version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}") - end - end - - # Converts a version string like "4.0.0" or "4.0" into a number. - # Returns nil if the version is unknown. - def version_string_to_number(version) - case version - when /\A3\.3(\.\d+)?\z/ - 1 - when /\A3\.4(\.\d+)?\z/ - 2 - when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/ - 3 - when /\A4\.1(\.\d+)?\z/ - 4 - end - end - - # Convert the given options into a serialized options string. - def dump_options(options) - template = +"" - values = [] - - template << "L" - if (filepath = options[:filepath]) - values.push(filepath.bytesize, filepath.b) - template << "A*" - else - values << 0 - end - - template << "l" - values << options.fetch(:line, 1) - - template << "L" - if (encoding = options[:encoding]) - name = encoding.is_a?(Encoding) ? encoding.name : encoding - values.push(name.bytesize, name.b) - template << "A*" - else - values << 0 - end - - template << "C" - values << (options.fetch(:frozen_string_literal, false) ? 1 : 0) - - template << "C" - values << dump_options_command_line(options) - - template << "C" - values << dump_options_version(options[:version]) - - template << "C" - values << (options[:encoding] == false ? 1 : 0) - - template << "C" - values << (options.fetch(:main_script, false) ? 1 : 0) - - template << "C" - values << (options.fetch(:partial_script, false) ? 1 : 0) - - template << "C" - values << (options.fetch(:freeze, false) ? 1 : 0) - - template << "L" - if (scopes = options[:scopes]) - values << scopes.length - - scopes.each do |scope| - locals = nil - forwarding = 0 - - case scope - when Array - locals = scope - when Scope - locals = scope.locals - - scope.forwarding.each do |forward| - case forward - when :* then forwarding |= 0x1 - when :** then forwarding |= 0x2 - when :& then forwarding |= 0x4 - when :"..." then forwarding |= 0x8 - else raise ArgumentError, "invalid forwarding value: #{forward}" - end - end - else - raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)" - end - - template << "L" - values << locals.length - - template << "C" - values << forwarding - - locals.each do |local| - name = local.name - template << "L" - values << name.bytesize - - template << "A*" - values << name.b - end - end - else - values << 0 - end - - values.pack(template) - end end # Here we are going to patch StringQuery to put in the class-level methods so @@ -580,17 +148,17 @@ class StringQuery # :nodoc: class << self # Mirrors the C extension's StringQuery::local? method. def local?(string) - query(LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name)) + query(FFICommon.string_query_local(string)) end # Mirrors the C extension's StringQuery::constant? method. def constant?(string) - query(LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name)) + query(FFICommon.string_query_constant(string)) end # Mirrors the C extension's StringQuery::method_name? method. def method_name?(string) - query(LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name)) + query(FFICommon.string_query_method_name(string)) end private diff --git a/lib/prism/ffi/common.rb b/lib/prism/ffi/common.rb new file mode 100644 index 0000000000..2600273318 --- /dev/null +++ b/lib/prism/ffi/common.rb @@ -0,0 +1,229 @@ +# frozen_string_literal: true +# :markup: markdown +# typed: ignore + +module Prism + + class Common + def dump(string, options) # :nodoc: + with_buffer do |buffer| + parse_only(buffer, string, options) + + dumped = buffer.read + dumped.freeze if options.fetch(:freeze, false) + + dumped + end + end + + def parse(string, code, options) # :nodoc: + serialized = dump(string, options) + Serialize.load_parse(code, serialized, options.fetch(:freeze, false)) + end + + def lex(string, code, options) # :nodoc: + with_buffer do |buffer| + lex_only(buffer, string, options) + Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false)) + end + end + + # Return the value that should be dumped for the command_line option. + def dump_options_command_line(options) + command_line = options.fetch(:command_line, "") + raise ArgumentError, "command_line must be a string" unless command_line.is_a?(String) + + command_line.each_char.inject(0) do |value, char| + case char + when "a" then value | 0b000001 + when "e" then value | 0b000010 + when "l" then value | 0b000100 + when "n" then value | 0b001000 + when "p" then value | 0b010000 + when "x" then value | 0b100000 + else raise ArgumentError, "invalid command_line option: #{char}" + end + end + end + + # Return the value that should be dumped for the version option. + def dump_options_version(version) + case version + when "current" + version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION) + when "latest", nil + 0 # Handled in pm_parser_init + when "nearest" + dump = version_string_to_number(RUBY_VERSION) + return dump if dump + if RUBY_VERSION < "3.3" + version_string_to_number("3.3") + else + 0 # Handled in pm_parser_init + end + else + version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}") + end + end + + # Converts a version string like "4.0.0" or "4.0" into a number. + # Returns nil if the version is unknown. + def version_string_to_number(version) + case version + when /\A3\.3(\.\d+)?\z/ + 1 + when /\A3\.4(\.\d+)?\z/ + 2 + when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/ + 3 + when /\A4\.1(\.\d+)?\z/ + 4 + end + end + + # Convert the given options into a serialized options string. + def dump_options(options) + template = +"" + values = [] + + template << "L" + if (filepath = options[:filepath]) + values.push(filepath.bytesize, filepath.b) + template << "A*" + else + values << 0 + end + + template << "l" + values << options.fetch(:line, 1) + + template << "L" + if (encoding = options[:encoding]) + name = encoding.is_a?(Encoding) ? encoding.name : encoding + values.push(name.bytesize, name.b) + template << "A*" + else + values << 0 + end + + template << "C" + values << (options.fetch(:frozen_string_literal, false) ? 1 : 0) + + template << "C" + values << dump_options_command_line(options) + + template << "C" + values << dump_options_version(options[:version]) + + template << "C" + values << (options[:encoding] == false ? 1 : 0) + + template << "C" + values << (options.fetch(:main_script, false) ? 1 : 0) + + template << "C" + values << (options.fetch(:partial_script, false) ? 1 : 0) + + template << "C" + values << (options.fetch(:freeze, false) ? 1 : 0) + + template << "L" + if (scopes = options[:scopes]) + values << scopes.length + + scopes.each do |scope| + locals = nil + forwarding = 0 + + case scope + when Array + locals = scope + when Scope + locals = scope.locals + + scope.forwarding.each do |forward| + case forward + when :* then forwarding |= 0x1 + when :** then forwarding |= 0x2 + when :& then forwarding |= 0x4 + when :"..." then forwarding |= 0x8 + else raise ArgumentError, "invalid forwarding value: #{forward}" + end + end + else + raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)" + end + + template << "L" + values << locals.length + + template << "C" + values << forwarding + + locals.each do |local| + name = local.name + template << "L" + values << name.bytesize + + template << "A*" + values << name.b + end + end + else + values << 0 + end + + values.pack(template) + end + + # Required APIs below + + def with_buffer(&b) + raise NotImplementedError + end + + def with_string(string, &b) + raise NotImplementedError + end + + def with_file(string, &b) + raise NotImplementedError + end + + def lex_only(buffer, string, options) + raise NotImplementedError + end + + def parse_only(buffer, string, options) + raise NotImplementedError + end + + def parse_stream(buffer, callback, eof_callback, options, source) + raise NotImplementedError + end + + def parse_comments(string, code, options) # :nodoc: + raise NotImplementedError + end + + def parse_lex(string, code, options) # :nodoc: + raise NotImplementedError + end + + def parse_file_success(string, options) # :nodoc: + raise NotImplementedError + end + + def string_query_method_name(string) + raise NotImplementedError + end + + def string_query_constant(string) + raise NotImplementedError + end + + def string_query_local(string) + raise NotImplementedError + end + end +end diff --git a/lib/prism/ffi/native_ffi.rb b/lib/prism/ffi/native_ffi.rb new file mode 100644 index 0000000000..600e4236ec --- /dev/null +++ b/lib/prism/ffi/native_ffi.rb @@ -0,0 +1,325 @@ +# frozen_string_literal: true +# :markup: markdown +# typed: ignore + +# This file is responsible for mirroring the API provided by the C extension by +# using FFI to call into the shared library. + +require "rbconfig" +require "ffi" + +# We want to eagerly load this file if there are Ractors so that it does not get +# autoloaded from within a non-main Ractor. +require "prism/serialize" if defined?(Ractor) + +module Prism # :nodoc: + module LibRubyParser # :nodoc: + extend FFI::Library + + # Define the library that we will be pulling functions from. Note that this + # must align with the build shared library from make/rake. + libprism_in_build = File.expand_path("../../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__) + libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}" + + if File.exist?(libprism_in_build) + INCLUDE_DIR = File.expand_path("../../../include", __dir__) + ffi_lib libprism_in_build + else + INCLUDE_DIR = "#{RbConfig::CONFIG["libdir"]}/prism/include" + ffi_lib libprism_in_libdir + end + + # Convert a native C type declaration into a symbol that FFI understands. + # For example: + # + # const char * -> :pointer + # bool -> :bool + # size_t -> :size_t + # void -> :void + # + def self.resolve_type(type, callbacks) + type = type.strip + + if !type.end_with?("*") + type.delete_prefix("const ").to_sym + else + type = type.delete_suffix("*").rstrip + callbacks.include?(type.to_sym) ? type.to_sym : :pointer + end + end + + # Read through the given header file and find the declaration of each of the + # given functions. For each one, define a function with the same name and + # signature as the C function. + def self.load_exported_functions_from(header, *functions, callbacks) + File.foreach("#{INCLUDE_DIR}/#{header}") do |line| + # We only want to attempt to load exported functions. + next unless line.start_with?("PRISM_EXPORTED_FUNCTION ") + + # We only want to load the functions that we are interested in. + next unless functions.any? { |function| line.include?(function) } + + # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.) + line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");") + + # Parse the function declaration. + unless /^PRISM_EXPORTED_FUNCTION (?.+) (?\w+)\((?.+)\);$/ =~ line + raise "Could not parse #{line}" + end + + # Delete the function from the list of functions we are looking for to + # mark it as having been found. + functions.delete(name) + + # Split up the argument types into an array, ensure we handle the case + # where there are no arguments (by explicit void). + arg_types = arg_types.split(",").map(&:strip) + arg_types = [] if arg_types == %w[void] + + # Resolve the type of the argument by dropping the name of the argument + # first if it is present. + arg_types.map! { |type| resolve_type(type.sub(/\w+$/, ""), callbacks) } + + # Attach the function using the FFI library. + attach_function name, arg_types, resolve_type(return_type, []) + end + + # If we didn't find all of the functions, raise an error. + raise "Could not find functions #{functions.inspect}" unless functions.empty? + end + + callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer + callback :pm_source_stream_feof_t, [:pointer], :int + pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR] + enum :pm_source_init_result_t, pm_source_init_result_values + enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] + + # Ractor-safe lookup table for pm_source_init_result_t, since FFI's + # enum_type accesses module instance variables that are not shareable. + SOURCE_INIT_RESULT = pm_source_init_result_values.freeze + + load_exported_functions_from( + "prism/version.h", + "pm_version", + [] + ) + + load_exported_functions_from( + "prism/serialize.h", + "pm_serialize_parse", + "pm_serialize_parse_stream", + "pm_serialize_parse_comments", + "pm_serialize_lex", + "pm_serialize_parse_lex", + "pm_serialize_parse_success_p", + [] + ) + + load_exported_functions_from( + "prism/string_query.h", + "pm_string_query_local", + "pm_string_query_constant", + "pm_string_query_method_name", + [] + ) + + load_exported_functions_from( + "prism/buffer.h", + "pm_buffer_new", + "pm_buffer_value", + "pm_buffer_length", + "pm_buffer_free", + [] + ) + + load_exported_functions_from( + "prism/source.h", + "pm_source_file_new", + "pm_source_mapped_new", + "pm_source_stream_new", + "pm_source_free", + "pm_source_source", + "pm_source_length", + [:pm_source_stream_fgets_t, :pm_source_stream_feof_t] + ) + + # This object represents a pm_buffer_t. We only use it as an opaque pointer, + # so it doesn't need to know the fields of pm_buffer_t. + class NativeBuffer # :nodoc: + attr_reader :pointer + + def initialize(pointer) + @pointer = pointer + end + + def value + LibRubyParser.pm_buffer_value(pointer) + end + + def length + LibRubyParser.pm_buffer_length(pointer) + end + + def read + value.read_string(length) + end + + # Initialize a new buffer and yield it to the block. The buffer will be + # automatically freed when the block returns. + def self.with + buffer = LibRubyParser.pm_buffer_new + raise unless buffer + + begin + yield new(buffer) + ensure + LibRubyParser.pm_buffer_free(buffer) + end + end + end + + # This object represents source code to be parsed. For strings it wraps a + # pointer directly; for files it uses a pm_source_t under the hood. + class NativeSource # :nodoc: + PLATFORM_EXPECTS_UTF8 = + RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i) + + attr_reader :pointer, :length + + def initialize(pointer, length, from_string) + @pointer = pointer + @length = length + @from_string = from_string + end + + def read + raise "should use the original String instead" if @from_string + @pointer.read_string(@length) + end + + # Yields a PrismSource backed by the given string to the block. + def self.with_string(string) + raise TypeError unless string.is_a?(String) + + length = string.bytesize + # + 1 to never get an address of 0, which pm_parser_init() asserts + FFI::MemoryPointer.new(:char, length + 1, false) do |pointer| + pointer.write_string(string) + # since we have the extra byte we might as well \0-terminate + pointer.put_char(length, 0) + return yield new(pointer, length, true) + end + end + + # Yields a PrismSource to the given block, backed by a pm_source_t. + def self.with_file(filepath) + raise TypeError unless filepath.is_a?(String) + + # On Windows and Mac, it's expected that filepaths will be encoded in + # UTF-8. If they are not, we need to convert them to UTF-8 before + # passing them into pm_source_mapped_new. + if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8 + filepath = filepath.encode(Encoding::UTF_8) + end + + FFI::MemoryPointer.new(:int) do |result_ptr| + pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr) + + case SOURCE_INIT_RESULT[result_ptr.read_int] + when :PM_SOURCE_INIT_SUCCESS + pointer = LibRubyParser.pm_source_source(pm_source) + length = LibRubyParser.pm_source_length(pm_source) + return yield new(pointer, length, false) + when :PM_SOURCE_INIT_ERROR_GENERIC + raise SystemCallError.new(filepath, FFI.errno) + when :PM_SOURCE_INIT_ERROR_DIRECTORY + raise Errno::EISDIR.new(filepath) + when :PM_SOURCE_INIT_ERROR_NON_REGULAR + # Fall back to reading the file through Ruby IO for non-regular + # files (pipes, character devices, etc.) + return with_string(File.read(filepath)) { |string| yield string } + else + raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}" + end + ensure + LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? + end + end + end + end + + # Mark the LibRubyParser module as private as it should only be called through + # the prism module. + private_constant :LibRubyParser + + class NativeCommon < Common + + # The version constant is set by reading the result of calling pm_version. + def version + LibRubyParser.pm_version.read_string.freeze + end + + def with_buffer(&b) + LibRubyParser::NativeBuffer.with(&b) + end + + def with_string(string, &b) + LibRubyParser::NativeSource.with_string(string, &b) + end + + def with_file(string, &b) + LibRubyParser::NativeSource.with_file(string, &b) + end + + def lex_only(buffer, string, options) + LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) + end + + def parse_only(buffer, string, options) + LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + end + + def parse_stream(buffer, callback, eof_callback, options, source) + pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback) + begin + LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options)) + Prism.load(source, buffer.read, options.fetch(:freeze, false)) + ensure + LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? + end + end + + def parse_comments(string, code, options) # :nodoc: + with_buffer do |buffer| + LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) + Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false)) + end + end + + def parse_lex(string, code, options) # :nodoc: + with_buffer do |buffer| + LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) + Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false)) + end + end + + def parse_file_success(string, options) # :nodoc: + LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options)) + end + + def string_query_method_name(string) + LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name) + end + + def string_query_constant(string) + LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name) + end + + def string_query_local(string) + LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name) + end + end + + FFICommon = NativeCommon.new + private_constant(:FFICommon) +end diff --git a/lib/prism/ffi/wasm_ffi.rb b/lib/prism/ffi/wasm_ffi.rb new file mode 100644 index 0000000000..37ad421474 --- /dev/null +++ b/lib/prism/ffi/wasm_ffi.rb @@ -0,0 +1,104 @@ +# frozen_string_literal: true +# :markup: markdown +# typed: ignore + +# This file is responsible for mirroring the API provided by the C extension by +# using FFI to call into the shared library. + +require "rbconfig" +require "ffi" + +# We want to eagerly load this file if there are Ractors so that it does not get +# autoloaded from within a non-main Ractor. +require "prism/serialize" if defined?(Ractor) + +# Load the prism-parser-wasm jar +require 'jar-dependencies' +require_jar('org.ruby-lang', 'prism-parser-wasm', '0.0.1-SNAPSHOT') +require_jar('com.dylibso.chicory', 'runtime', '1.6.1') +require_jar('com.dylibso.chicory', 'wasi', '1.6.1') +require_jar('com.dylibso.chicory', 'wasm', '1.6.1') +require_jar('com.dylibso.chicory', 'log', '1.6.1') + +module Prism # :nodoc: + class WASMCommon < Common + java_import org.ruby_lang.prism.wasm.Prism + + # TODO: concurrency + PRISM = org.ruby_lang.prism.wasm.Prism.new + + def version + # The version constant is set by reading the result of calling pm_version. + WASM::PRISM.version + end + + # Prototype WASM code + # def dump(source, **options) + # parsed = WASM::PRISM.parse(source.to_java_bytes, dump_options(options).to_java_bytes) + # end + # + # # Mirror the Prism.dump_file API by using the serialization API. + # def dump_file(filepath, **options) + # dump_file(File.read(filepath), filepath: filepath, **options) + # end + # + # # Mirror the Prism.lex API by using the serialization API. + # def lex(source, **options) + # lexed = WASM::PRISM.lex(source.to_java_bytes, dump_options(options).to_java_bytes) + # Serialize.load_lex(source, lexed, options.fetch(:freeze, false)) + # end + # + # # Mirror the Prism.lex_file API by using the serialization API. + # def lex_file(filepath, **options) + # lex_file(File.read(filepath), filepath: filepath, **options) + # end + + def with_buffer(&b) + raise NotImplementedError + end + + def with_string(string, &b) + raise NotImplementedError + end + + def with_file(string, &b) + raise NotImplementedError + end + + def lex_only(buffer, string, options) + raise NotImplementedError + end + + def parse_only(buffer, string, options) + raise NotImplementedError + end + + def parse_stream(buffer, callback, eof_callback, options, source) + raise NotImplementedError + end + + def parse_comments(string, code, options) # :nodoc: + raise NotImplementedError + end + + def parse_lex(string, code, options) # :nodoc: + raise NotImplementedError + end + + def parse_file_success(string, options) # :nodoc: + raise NotImplementedError + end + + def string_query_method_name(string) + raise NotImplementedError + end + + def string_query_constant(string) + raise NotImplementedError + end + + def string_query_local(string) + raise NotImplementedError + end + end +end