ruby · Earlopain · Mar 27, 2025 · Mar 28, 2025
diff --git a/lib/prism/translation.rb b/lib/prism/translation.rb
@@ -5,6 +5,7 @@ module Prism
   # syntax trees.
   module Translation # steep:ignore
     autoload :Parser, "prism/translation/parser"
+    autoload :ParserBase, "prism/translation/parser_base"
     autoload :ParserCurrent, "prism/translation/parser_current"
     autoload :Parser33, "prism/translation/parser33"
     autoload :Parser34, "prism/translation/parser34"

diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb
@@ -1,366 +1,32 @@
 # frozen_string_literal: true
 
-begin
-  required_version = ">= 3.3.7.2"
-  gem "parser", required_version
-  require "parser"
-rescue LoadError
-  warn(<<~MSG)
-    Error: Unable to load parser #{required_version}. \
-    Add `gem "parser"` to your Gemfile or run `bundle update parser`.
-  MSG
-  exit(1)
-end
-
 module Prism
   module Translation
-    # This class is the entry-point for converting a prism syntax tree into the
-    # whitequark/parser gem's syntax tree. It inherits from the base parser for
-    # the parser gem, and overrides the parse* methods to parse with prism and
-    # then translate.
-    class Parser < ::Parser::Base
-      Diagnostic = ::Parser::Diagnostic # :nodoc:
-      private_constant :Diagnostic
-
-      # The parser gem has a list of diagnostics with a hard-coded set of error
-      # messages. We create our own diagnostic class in order to set our own
-      # error messages.
-      class PrismDiagnostic < Diagnostic
-        # This is the cached message coming from prism.
-        attr_reader :message
-
-        # Initialize a new diagnostic with the given message and location.
-        def initialize(message, level, reason, location)
-          @message = message
-          super(level, reason, {}, location, [])
-        end
+    class Parser < ParserBase # :nodoc:
+      def self.inherited(subclass)
+        warn(<<~MSG, uplevel: 1, category: :deprecated)
+          [deprecation]: Using `Prism::Translation::Parser` is deprecated and will be \
+          removed in the next major version. Use `Prism::Translation::Parser34` instead.
+        MSG
+        super
       end
 
-      Racc_debug_parser = false # :nodoc:
-
-      # The `builder` argument is used to create the parser using our custom builder class by default.
-      #
-      # By using the `:parser` keyword argument, you can translate in a way that is compatible with
-      # the Parser gem using any parser.
-      #
-      # For example, in RuboCop for Ruby LSP, the following approach can be used to improve performance
-      # by reusing a pre-parsed `Prism::ParseLexResult`:
-      #
-      #   class PrismPreparsed
-      #     def initialize(prism_result)
-      #       @prism_result = prism_result
-      #     end
-      #
-      #     def parse_lex(source, **options)
-      #       @prism_result
-      #     end
-      #   end
-      #
-      #   prism_preparsed = PrismPreparsed.new(prism_result)
-      #
-      #   Prism::Translation::Ruby34.new(builder, parser: prism_preparsed)
-      #
-      # In an object passed to the `:parser` keyword argument, the `parse` and `parse_lex` methods
-      # should be implemented as needed.
-      #
       def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism)
-        if !builder.is_a?(Prism::Translation::Parser::Builder)
-          warn(<<~MSG, uplevel: 1, category: :deprecated)
-            [deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \
-            `Prism::Translation::Parser::Builder` subclass. This will raise in the next major version.
-          MSG
+        # Filter parser frames, find first user code
+        offset = caller(1..5)&.find_index do |loc|
+          !loc.include?("/lib/parser/base.rb")
         end
-        @parser = parser
 
-        super(builder)
+        warn(<<~MSG, uplevel: 1 + (offset || 0), category: :deprecated)
+          [deprecation]: Using `Prism::Translation::Parser` is deprecated and will be \
+          removed in the next major version. Use `Prism::Translation::Parser34` instead.
+        MSG
+        super
       end
 
       def version # :nodoc:
         34
       end
-
-      # The default encoding for Ruby files is UTF-8.
-      def default_encoding
-        Encoding::UTF_8
-      end
-
-      def yyerror # :nodoc:
-      end
-
-      # Parses a source buffer and returns the AST.
-      def parse(source_buffer)
-        @source_buffer = source_buffer
-        source = source_buffer.source
-
-        offset_cache = build_offset_cache(source)
-        result = unwrap(@parser.parse(source, **prism_options), offset_cache)
-
-        build_ast(result.value, offset_cache)
-      ensure
-        @source_buffer = nil
-      end
-
-      # Parses a source buffer and returns the AST and the source code comments.
-      def parse_with_comments(source_buffer)
-        @source_buffer = source_buffer
-        source = source_buffer.source
-
-        offset_cache = build_offset_cache(source)
-        result = unwrap(@parser.parse(source, **prism_options), offset_cache)
-
-        [
-          build_ast(result.value, offset_cache),
-          build_comments(result.comments, offset_cache)
-        ]
-      ensure
-        @source_buffer = nil
-      end
-
-      # Parses a source buffer and returns the AST, the source code comments,
-      # and the tokens emitted by the lexer.
-      def tokenize(source_buffer, recover = false)
-        @source_buffer = source_buffer
-        source = source_buffer.source
-
-        offset_cache = build_offset_cache(source)
-        result =
-          begin
-            unwrap(@parser.parse_lex(source, **prism_options), offset_cache)
-          rescue ::Parser::SyntaxError
-            raise if !recover
-          end
-
-        program, tokens = result.value
-        ast = build_ast(program, offset_cache) if result.success?
-
-        [
-          ast,
-          build_comments(result.comments, offset_cache),
-          build_tokens(tokens, offset_cache)
-        ]
-      ensure
-        @source_buffer = nil
-      end
-
-      # Since prism resolves num params for us, we don't need to support this
-      # kind of logic here.
-      def try_declare_numparam(node)
-        node.children[0].match?(/\A_[1-9]\z/)
-      end
-
-      private
-
-      # This is a hook to allow consumers to disable some errors if they don't
-      # want them to block creating the syntax tree.
-      def valid_error?(error)
-        true
-      end
-
-      # This is a hook to allow consumers to disable some warnings if they don't
-      # want them to block creating the syntax tree.
-      def valid_warning?(warning)
-        true
-      end
-
-      # Build a diagnostic from the given prism parse error.
-      def error_diagnostic(error, offset_cache)
-        location = error.location
-        diagnostic_location = build_range(location, offset_cache)
-
-        case error.type
-        when :argument_block_multi
-          Diagnostic.new(:error, :block_and_blockarg, {}, diagnostic_location, [])
-        when :argument_formal_constant
-          Diagnostic.new(:error, :argument_const, {}, diagnostic_location, [])
-        when :argument_formal_class
-          Diagnostic.new(:error, :argument_cvar, {}, diagnostic_location, [])
-        when :argument_formal_global
-          Diagnostic.new(:error, :argument_gvar, {}, diagnostic_location, [])
-        when :argument_formal_ivar
-          Diagnostic.new(:error, :argument_ivar, {}, diagnostic_location, [])
-        when :argument_no_forwarding_amp
-          Diagnostic.new(:error, :no_anonymous_blockarg, {}, diagnostic_location, [])
-        when :argument_no_forwarding_star
-          Diagnostic.new(:error, :no_anonymous_restarg, {}, diagnostic_location, [])
-        when :argument_no_forwarding_star_star
-          Diagnostic.new(:error, :no_anonymous_kwrestarg, {}, diagnostic_location, [])
-        when :begin_lonely_else
-          location = location.copy(length: 4)
-          diagnostic_location = build_range(location, offset_cache)
-          Diagnostic.new(:error, :useless_else, {}, diagnostic_location, [])
-        when :class_name, :module_name
-          Diagnostic.new(:error, :module_name_const, {}, diagnostic_location, [])
-        when :class_in_method
-          Diagnostic.new(:error, :class_in_def, {}, diagnostic_location, [])
-        when :def_endless_setter
-          Diagnostic.new(:error, :endless_setter, {}, diagnostic_location, [])
-        when :embdoc_term
-          Diagnostic.new(:error, :embedded_document, {}, diagnostic_location, [])
-        when :incomplete_variable_class, :incomplete_variable_class_3_3
-          location = location.copy(length: location.length + 1)
-          diagnostic_location = build_range(location, offset_cache)
-
-          Diagnostic.new(:error, :cvar_name, { name: location.slice }, diagnostic_location, [])
-        when :incomplete_variable_instance, :incomplete_variable_instance_3_3
-          location = location.copy(length: location.length + 1)
-          diagnostic_location = build_range(location, offset_cache)
-
-          Diagnostic.new(:error, :ivar_name, { name: location.slice }, diagnostic_location, [])
-        when :invalid_variable_global, :invalid_variable_global_3_3
-          Diagnostic.new(:error, :gvar_name, { name: location.slice }, diagnostic_location, [])
-        when :module_in_method
-          Diagnostic.new(:error, :module_in_def, {}, diagnostic_location, [])
-        when :numbered_parameter_ordinary
-          Diagnostic.new(:error, :ordinary_param_defined, {}, diagnostic_location, [])
-        when :numbered_parameter_outer_scope
-          Diagnostic.new(:error, :numparam_used_in_outer_scope, {}, diagnostic_location, [])
-        when :parameter_circular
-          Diagnostic.new(:error, :circular_argument_reference, { var_name: location.slice }, diagnostic_location, [])
-        when :parameter_name_repeat
-          Diagnostic.new(:error, :duplicate_argument, {}, diagnostic_location, [])
-        when :parameter_numbered_reserved
-          Diagnostic.new(:error, :reserved_for_numparam, { name: location.slice }, diagnostic_location, [])
-        when :regexp_unknown_options
-          Diagnostic.new(:error, :regexp_options, { options: location.slice[1..] }, diagnostic_location, [])
-        when :singleton_for_literals
-          Diagnostic.new(:error, :singleton_literal, {}, diagnostic_location, [])
-        when :string_literal_eof
-          Diagnostic.new(:error, :string_eof, {}, diagnostic_location, [])
-        when :unexpected_token_ignore
-          Diagnostic.new(:error, :unexpected_token, { token: location.slice }, diagnostic_location, [])
-        when :write_target_in_method
-          Diagnostic.new(:error, :dynamic_const, {}, diagnostic_location, [])
-        else
-          PrismDiagnostic.new(error.message, :error, error.type, diagnostic_location)
-        end
-      end
-
-      # Build a diagnostic from the given prism parse warning.
-      def warning_diagnostic(warning, offset_cache)
-        diagnostic_location = build_range(warning.location, offset_cache)
-
-        case warning.type
-        when :ambiguous_first_argument_plus
-          Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "+" }, diagnostic_location, [])
-        when :ambiguous_first_argument_minus
-          Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "-" }, diagnostic_location, [])
-        when :ambiguous_prefix_ampersand
-          Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "&" }, diagnostic_location, [])
-        when :ambiguous_prefix_star
-          Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "*" }, diagnostic_location, [])
-        when :ambiguous_prefix_star_star
-          Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "**" }, diagnostic_location, [])
-        when :ambiguous_slash
-          Diagnostic.new(:warning, :ambiguous_regexp, {}, diagnostic_location, [])
-        when :dot_dot_dot_eol
-          Diagnostic.new(:warning, :triple_dot_at_eol, {}, diagnostic_location, [])
-        when :duplicated_hash_key
-          # skip, parser does this on its own
-        else
-          PrismDiagnostic.new(warning.message, :warning, warning.type, diagnostic_location)
-        end
-      end
-
-      # If there was a error generated during the parse, then raise an
-      # appropriate syntax error. Otherwise return the result.
-      def unwrap(result, offset_cache)
-        result.errors.each do |error|
-          next unless valid_error?(error)
-          diagnostics.process(error_diagnostic(error, offset_cache))
-        end
-
-        result.warnings.each do |warning|
-          next unless valid_warning?(warning)
-          diagnostic = warning_diagnostic(warning, offset_cache)
-          diagnostics.process(diagnostic) if diagnostic
-        end
-
-        result
-      end
-
-      # Prism deals with offsets in bytes, while the parser gem deals with
-      # offsets in characters. We need to handle this conversion in order to
-      # build the parser gem AST.
-      #
-      # If the bytesize of the source is the same as the length, then we can
-      # just use the offset directly. Otherwise, we build an array where the
-      # index is the byte offset and the value is the character offset.
-      def build_offset_cache(source)
-        if source.bytesize == source.length
-          -> (offset) { offset }
-        else
-          offset_cache = []
-          offset = 0
-
-          source.each_char do |char|
-            char.bytesize.times { offset_cache << offset }
-            offset += 1
-          end
-
-          offset_cache << offset
-        end
-      end
-
-      # Build the parser gem AST from the prism AST.
-      def build_ast(program, offset_cache)
-        program.accept(Compiler.new(self, offset_cache))
-      end
-
-      # Build the parser gem comments from the prism comments.
-      def build_comments(comments, offset_cache)
-        comments.map do |comment|
-          ::Parser::Source::Comment.new(build_range(comment.location, offset_cache))
-        end
-      end
-
-      # Build the parser gem tokens from the prism tokens.
-      def build_tokens(tokens, offset_cache)
-        Lexer.new(source_buffer, tokens, offset_cache).to_a
-      end
-
-      # Build a range from a prism location.
-      def build_range(location, offset_cache)
-        ::Parser::Source::Range.new(
-          source_buffer,
-          offset_cache[location.start_offset],
-          offset_cache[location.end_offset]
-        )
-      end
-
-      # Options for how prism should parse/lex the source.
-      def prism_options
-        options = {
-          filepath: @source_buffer.name,
-          version: convert_for_prism(version),
-          partial_script: true,
-        }
-        # The parser gem always encodes to UTF-8, unless it is binary.
-        # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/source/buffer.rb#L80-L107
-        options[:encoding] = false if @source_buffer.source.encoding != Encoding::BINARY
-
-        options
-      end
-
-      # Converts the version format handled by Parser to the format handled by Prism.
-      def convert_for_prism(version)
-        case version
-        when 33
-          "3.3.1"
-        when 34
-          "3.4.0"
-        when 35
-          "3.5.0"
-        else
-          "latest"
-        end
-      end
-
-      require_relative "parser/builder"
-      require_relative "parser/compiler"
-      require_relative "parser/lexer"
-
-      private_constant :Compiler
-      private_constant :Lexer
     end
   end
 end
diff --git a/lib/prism/translation/parser/builder.rb b/lib/prism/translation/parser/builder.rb
@@ -2,7 +2,7 @@
 
 module Prism
   module Translation
-    class Parser
+    class Parser < ParserBase
       # A builder that knows how to convert more modern Ruby syntax
       # into whitequark/parser gem's syntax tree.
       class Builder < ::Parser::Builders::Default