Skip to content

Deprecate Prism::Translation::Parser #3526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/prism/translation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module Prism
# syntax trees.
module Translation # steep:ignore
autoload :Parser, "prism/translation/parser"
autoload :ParserBase, "prism/translation/parser_base"
autoload :ParserCurrent, "prism/translation/parser_current"
autoload :Parser33, "prism/translation/parser33"
autoload :Parser34, "prism/translation/parser34"
Expand Down
364 changes: 15 additions & 349 deletions lib/prism/translation/parser.rb
Original file line number Diff line number Diff line change
@@ -1,366 +1,32 @@
# frozen_string_literal: true

begin
required_version = ">= 3.3.7.2"
gem "parser", required_version
require "parser"
rescue LoadError
warn(<<~MSG)
Error: Unable to load parser #{required_version}. \
Add `gem "parser"` to your Gemfile or run `bundle update parser`.
MSG
exit(1)
end

module Prism
module Translation
# This class is the entry-point for converting a prism syntax tree into the
# whitequark/parser gem's syntax tree. It inherits from the base parser for
# the parser gem, and overrides the parse* methods to parse with prism and
# then translate.
class Parser < ::Parser::Base
Diagnostic = ::Parser::Diagnostic # :nodoc:
private_constant :Diagnostic

# The parser gem has a list of diagnostics with a hard-coded set of error
# messages. We create our own diagnostic class in order to set our own
# error messages.
class PrismDiagnostic < Diagnostic
# This is the cached message coming from prism.
attr_reader :message

# Initialize a new diagnostic with the given message and location.
def initialize(message, level, reason, location)
@message = message
super(level, reason, {}, location, [])
end
class Parser < ParserBase # :nodoc:
def self.inherited(subclass)
warn(<<~MSG, uplevel: 1, category: :deprecated)
[deprecation]: Using `Prism::Translation::Parser` is deprecated and will be \
removed in the next major version. Use `Prism::Translation::Parser34` instead.
MSG
super
end

Racc_debug_parser = false # :nodoc:

# The `builder` argument is used to create the parser using our custom builder class by default.
#
# By using the `:parser` keyword argument, you can translate in a way that is compatible with
# the Parser gem using any parser.
#
# For example, in RuboCop for Ruby LSP, the following approach can be used to improve performance
# by reusing a pre-parsed `Prism::ParseLexResult`:
#
# class PrismPreparsed
# def initialize(prism_result)
# @prism_result = prism_result
# end
#
# def parse_lex(source, **options)
# @prism_result
# end
# end
#
# prism_preparsed = PrismPreparsed.new(prism_result)
#
# Prism::Translation::Ruby34.new(builder, parser: prism_preparsed)
#
# In an object passed to the `:parser` keyword argument, the `parse` and `parse_lex` methods
# should be implemented as needed.
#
def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism)
if !builder.is_a?(Prism::Translation::Parser::Builder)
warn(<<~MSG, uplevel: 1, category: :deprecated)
[deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \
`Prism::Translation::Parser::Builder` subclass. This will raise in the next major version.
MSG
# Filter parser frames, find first user code
offset = caller(1..5)&.find_index do |loc|
!loc.include?("/lib/parser/base.rb")
end
@parser = parser

super(builder)
warn(<<~MSG, uplevel: 1 + (offset || 0), category: :deprecated)
[deprecation]: Using `Prism::Translation::Parser` is deprecated and will be \
removed in the next major version. Use `Prism::Translation::Parser34` instead.
MSG
super
end

def version # :nodoc:
34
end

# The default encoding for Ruby files is UTF-8.
def default_encoding
Encoding::UTF_8
end

def yyerror # :nodoc:
end

# Parses a source buffer and returns the AST.
def parse(source_buffer)
@source_buffer = source_buffer
source = source_buffer.source

offset_cache = build_offset_cache(source)
result = unwrap(@parser.parse(source, **prism_options), offset_cache)

build_ast(result.value, offset_cache)
ensure
@source_buffer = nil
end

# Parses a source buffer and returns the AST and the source code comments.
def parse_with_comments(source_buffer)
@source_buffer = source_buffer
source = source_buffer.source

offset_cache = build_offset_cache(source)
result = unwrap(@parser.parse(source, **prism_options), offset_cache)

[
build_ast(result.value, offset_cache),
build_comments(result.comments, offset_cache)
]
ensure
@source_buffer = nil
end

# Parses a source buffer and returns the AST, the source code comments,
# and the tokens emitted by the lexer.
def tokenize(source_buffer, recover = false)
@source_buffer = source_buffer
source = source_buffer.source

offset_cache = build_offset_cache(source)
result =
begin
unwrap(@parser.parse_lex(source, **prism_options), offset_cache)
rescue ::Parser::SyntaxError
raise if !recover
end

program, tokens = result.value
ast = build_ast(program, offset_cache) if result.success?

[
ast,
build_comments(result.comments, offset_cache),
build_tokens(tokens, offset_cache)
]
ensure
@source_buffer = nil
end

# Since prism resolves num params for us, we don't need to support this
# kind of logic here.
def try_declare_numparam(node)
node.children[0].match?(/\A_[1-9]\z/)
end

private

# This is a hook to allow consumers to disable some errors if they don't
# want them to block creating the syntax tree.
def valid_error?(error)
true
end

# This is a hook to allow consumers to disable some warnings if they don't
# want them to block creating the syntax tree.
def valid_warning?(warning)
true
end

# Build a diagnostic from the given prism parse error.
def error_diagnostic(error, offset_cache)
location = error.location
diagnostic_location = build_range(location, offset_cache)

case error.type
when :argument_block_multi
Diagnostic.new(:error, :block_and_blockarg, {}, diagnostic_location, [])
when :argument_formal_constant
Diagnostic.new(:error, :argument_const, {}, diagnostic_location, [])
when :argument_formal_class
Diagnostic.new(:error, :argument_cvar, {}, diagnostic_location, [])
when :argument_formal_global
Diagnostic.new(:error, :argument_gvar, {}, diagnostic_location, [])
when :argument_formal_ivar
Diagnostic.new(:error, :argument_ivar, {}, diagnostic_location, [])
when :argument_no_forwarding_amp
Diagnostic.new(:error, :no_anonymous_blockarg, {}, diagnostic_location, [])
when :argument_no_forwarding_star
Diagnostic.new(:error, :no_anonymous_restarg, {}, diagnostic_location, [])
when :argument_no_forwarding_star_star
Diagnostic.new(:error, :no_anonymous_kwrestarg, {}, diagnostic_location, [])
when :begin_lonely_else
location = location.copy(length: 4)
diagnostic_location = build_range(location, offset_cache)
Diagnostic.new(:error, :useless_else, {}, diagnostic_location, [])
when :class_name, :module_name
Diagnostic.new(:error, :module_name_const, {}, diagnostic_location, [])
when :class_in_method
Diagnostic.new(:error, :class_in_def, {}, diagnostic_location, [])
when :def_endless_setter
Diagnostic.new(:error, :endless_setter, {}, diagnostic_location, [])
when :embdoc_term
Diagnostic.new(:error, :embedded_document, {}, diagnostic_location, [])
when :incomplete_variable_class, :incomplete_variable_class_3_3
location = location.copy(length: location.length + 1)
diagnostic_location = build_range(location, offset_cache)

Diagnostic.new(:error, :cvar_name, { name: location.slice }, diagnostic_location, [])
when :incomplete_variable_instance, :incomplete_variable_instance_3_3
location = location.copy(length: location.length + 1)
diagnostic_location = build_range(location, offset_cache)

Diagnostic.new(:error, :ivar_name, { name: location.slice }, diagnostic_location, [])
when :invalid_variable_global, :invalid_variable_global_3_3
Diagnostic.new(:error, :gvar_name, { name: location.slice }, diagnostic_location, [])
when :module_in_method
Diagnostic.new(:error, :module_in_def, {}, diagnostic_location, [])
when :numbered_parameter_ordinary
Diagnostic.new(:error, :ordinary_param_defined, {}, diagnostic_location, [])
when :numbered_parameter_outer_scope
Diagnostic.new(:error, :numparam_used_in_outer_scope, {}, diagnostic_location, [])
when :parameter_circular
Diagnostic.new(:error, :circular_argument_reference, { var_name: location.slice }, diagnostic_location, [])
when :parameter_name_repeat
Diagnostic.new(:error, :duplicate_argument, {}, diagnostic_location, [])
when :parameter_numbered_reserved
Diagnostic.new(:error, :reserved_for_numparam, { name: location.slice }, diagnostic_location, [])
when :regexp_unknown_options
Diagnostic.new(:error, :regexp_options, { options: location.slice[1..] }, diagnostic_location, [])
when :singleton_for_literals
Diagnostic.new(:error, :singleton_literal, {}, diagnostic_location, [])
when :string_literal_eof
Diagnostic.new(:error, :string_eof, {}, diagnostic_location, [])
when :unexpected_token_ignore
Diagnostic.new(:error, :unexpected_token, { token: location.slice }, diagnostic_location, [])
when :write_target_in_method
Diagnostic.new(:error, :dynamic_const, {}, diagnostic_location, [])
else
PrismDiagnostic.new(error.message, :error, error.type, diagnostic_location)
end
end

# Build a diagnostic from the given prism parse warning.
def warning_diagnostic(warning, offset_cache)
diagnostic_location = build_range(warning.location, offset_cache)

case warning.type
when :ambiguous_first_argument_plus
Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "+" }, diagnostic_location, [])
when :ambiguous_first_argument_minus
Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "-" }, diagnostic_location, [])
when :ambiguous_prefix_ampersand
Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "&" }, diagnostic_location, [])
when :ambiguous_prefix_star
Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "*" }, diagnostic_location, [])
when :ambiguous_prefix_star_star
Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "**" }, diagnostic_location, [])
when :ambiguous_slash
Diagnostic.new(:warning, :ambiguous_regexp, {}, diagnostic_location, [])
when :dot_dot_dot_eol
Diagnostic.new(:warning, :triple_dot_at_eol, {}, diagnostic_location, [])
when :duplicated_hash_key
# skip, parser does this on its own
else
PrismDiagnostic.new(warning.message, :warning, warning.type, diagnostic_location)
end
end

# If there was a error generated during the parse, then raise an
# appropriate syntax error. Otherwise return the result.
def unwrap(result, offset_cache)
result.errors.each do |error|
next unless valid_error?(error)
diagnostics.process(error_diagnostic(error, offset_cache))
end

result.warnings.each do |warning|
next unless valid_warning?(warning)
diagnostic = warning_diagnostic(warning, offset_cache)
diagnostics.process(diagnostic) if diagnostic
end

result
end

# Prism deals with offsets in bytes, while the parser gem deals with
# offsets in characters. We need to handle this conversion in order to
# build the parser gem AST.
#
# If the bytesize of the source is the same as the length, then we can
# just use the offset directly. Otherwise, we build an array where the
# index is the byte offset and the value is the character offset.
def build_offset_cache(source)
if source.bytesize == source.length
-> (offset) { offset }
else
offset_cache = []
offset = 0

source.each_char do |char|
char.bytesize.times { offset_cache << offset }
offset += 1
end

offset_cache << offset
end
end

# Build the parser gem AST from the prism AST.
def build_ast(program, offset_cache)
program.accept(Compiler.new(self, offset_cache))
end

# Build the parser gem comments from the prism comments.
def build_comments(comments, offset_cache)
comments.map do |comment|
::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
end
end

# Build the parser gem tokens from the prism tokens.
def build_tokens(tokens, offset_cache)
Lexer.new(source_buffer, tokens, offset_cache).to_a
end

# Build a range from a prism location.
def build_range(location, offset_cache)
::Parser::Source::Range.new(
source_buffer,
offset_cache[location.start_offset],
offset_cache[location.end_offset]
)
end

# Options for how prism should parse/lex the source.
def prism_options
options = {
filepath: @source_buffer.name,
version: convert_for_prism(version),
partial_script: true,
}
# The parser gem always encodes to UTF-8, unless it is binary.
# https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/source/buffer.rb#L80-L107
options[:encoding] = false if @source_buffer.source.encoding != Encoding::BINARY

options
end

# Converts the version format handled by Parser to the format handled by Prism.
def convert_for_prism(version)
case version
when 33
"3.3.1"
when 34
"3.4.0"
when 35
"3.5.0"
else
"latest"
end
end

require_relative "parser/builder"
require_relative "parser/compiler"
require_relative "parser/lexer"

private_constant :Compiler
private_constant :Lexer
end
end
end
2 changes: 1 addition & 1 deletion lib/prism/translation/parser/builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

module Prism
module Translation
class Parser
class Parser < ParserBase
# A builder that knows how to convert more modern Ruby syntax
# into whitequark/parser gem's syntax tree.
class Builder < ::Parser::Builders::Default
Expand Down
Loading
Loading