class Parser
The Ruby source code parser.
Definitions
def initialize(language)
Initialize a new Ruby parser.
Signature
-
parameter
language
Language
The language instance.
Implementation
def initialize(language)
@language = language
@visibility = :public
@definitions = Hash.new.compare_by_identity
end
def definitions_for(source, &block)
Extract definitions from the given input file.
Implementation
def definitions_for(source, &block)
return enum_for(:definitions_for, source) unless block_given?
result = self.parse_source(source)
result.attach_comments!
# Pass the source to walk_definitions for location tracking
source = source.is_a?(Source) ? source : nil
walk_definitions(result.value, nil, source, &block)
end
def walk_definitions(node, parent = nil, source = nil, &block)
Walk over the syntax tree and extract relevant definitions with their associated comments.
Implementation
def walk_definitions(node, parent = nil, source = nil, &block)
# Check for scope definitions from comments
if node.comments.any?
parent = scope_for(comments_for(node), parent, &block) || parent
end
case node.type
when :program_node
with_visibility do
node.child_nodes.each do |child|
walk_definitions(child, parent, source, &block)
end
end
when :statements_node
node.child_nodes.each do |child|
walk_definitions(child, parent, source, &block)
end
when :block_node
if node.body
walk_definitions(node.body, parent, source, &block)
end
when :module_node
path = nested_path_for(node.constant_path)
definition = Module.new(path,
visibility: :public,
comments: comments_for(node),
parent: parent,
node: node,
language: @language,
source: source,
)
store_definition(parent, path.last.to_sym, definition)
yield definition
if body = node.body
with_visibility do
walk_definitions(body, definition, source, &block)
end
end
when :class_node
path = nested_path_for(node.constant_path)
super_class = nested_name_for(node.superclass)
definition = Class.new(path,
super_class: super_class,
visibility: :public,
comments: comments_for(node),
parent: parent,
node: node,
language: @language,
source: source,
)
store_definition(parent, path.last.to_sym, definition)
yield definition
if body = node.body
with_visibility do
walk_definitions(body, definition, source, &block)
end
end
when :singleton_class_node
if name = singleton_name_for(node)
definition = Singleton.new(name,
comments: comments_for(node),
parent: parent, language: @language, visibility: :public, source: source
)
yield definition
if body = node.body
walk_definitions(body, definition, source, &block)
end
end
when :def_node
receiver = receiver_for(node.receiver)
definition = Method.new(node.name,
visibility: @visibility,
comments: comments_for(node),
parent: parent,
node: node,
language: @language,
receiver: receiver,
source: source,
)
yield definition
when :constant_write_node
definition = Constant.new(node.name,
comments: comments_for(node),
parent: parent,
node: node,
language: @language,
)
store_definition(parent, node.name, definition)
yield definition
when :call_node
name = node.name
case name
when :public, :protected, :private
# Handle cases like "private def foo" where method definitions are arguments
if node.arguments
has_method_definitions = false
node.arguments.arguments.each do |arg_node|
if arg_node.type == :def_node
has_method_definitions = true
# Process the method definition with the specified visibility
receiver = receiver_for(arg_node.receiver)
definition = Method.new(arg_node.name,
visibility: name,
comments: comments_for(arg_node),
parent: parent,
node: arg_node,
language: @language,
receiver: receiver,
)
yield definition
end
end
# Only set visibility state if this is NOT an inline method definition
unless has_method_definitions
@visibility = name
end
else
# No arguments, so this is a standalone visibility modifier
@visibility = name
end
when :private_constant
if node.arguments
constant_names_for(node.arguments.arguments) do |name|
if definition = lookup_definition(parent, name)
definition.visibility = :private
end
end
end
when :attr, :attr_reader, :attr_writer, :attr_accessor
definition = Attribute.new(attribute_name_for(node),
comments: comments_for(node),
parent: parent, language: @language, node: node
)
yield definition
when :alias_method
# Handle alias_method :new_name, :old_name syntax
if node.arguments && node.arguments.arguments.size >= 2
new_name_arg = node.arguments.arguments[0]
old_name_arg = node.arguments.arguments[1]
# Extract symbol names from the arguments
new_name = symbol_name_for(new_name_arg)
old_name = symbol_name_for(old_name_arg)
definition = Alias.new(new_name.to_sym, old_name.to_sym,
comments: comments_for(node),
parent: parent,
node: node,
language: @language,
visibility: @visibility,
source: source,
)
yield definition
end
else
# Check if this call should be treated as a definition
# either because it has a @name comment, @attribute comment, or a block
has_name_comment = comments_for(node).any? { |comment| comment.match(NAME_ATTRIBUTE) }
has_attribute_comment = kind_for(node, comments_for(node))
has_block = node.block
if has_name_comment || has_attribute_comment || has_block
definition = Call.new(
attribute_name_for(node),
comments: comments_for(node),
parent: parent, language: @language, node: node
)
yield definition
# Walk into the block body if it exists
if node.block
walk_definitions(node.block, definition, source, &block)
end
end
end
when :alias_method_node
# Handle alias new_name old_name syntax
new_name = node.new_name.unescaped
old_name = node.old_name.unescaped
definition = Alias.new(new_name.to_sym, old_name.to_sym,
comments: comments_for(node),
parent: parent,
node: node,
language: @language,
visibility: @visibility,
source: source,
)
yield definition
else
if node.respond_to?(:statements)
walk_definitions(node.statements, parent, source, &block)
else
# $stderr.puts "Ignoring #{node.type}"
end
end
end
def segments_for(source, &block)
Extract segments from the given input file.
Implementation
def segments_for(source, &block)
result = self.parse_source(source)
comments = result.comments.reject do |comment|
comment.location.slice.start_with?("#!/") ||
comment.location.slice.start_with?("# frozen_string_literal:") ||
comment.location.slice.start_with?("# Released under the MIT License.") ||
comment.location.slice.start_with?("# Copyright,")
end
# Now we iterate over the syntax tree and generate segments:
walk_segments(result.value, comments, &block)
end
def comments_for(node)
Extract clean comment text from a node by removing leading # symbols and whitespace. Only returns comments that directly precede the node (i.e., are adjacent to it).
Signature
-
parameter
node
Node
The AST node with comments.
-
returns
Array
Array of cleaned comment strings.
Implementation
def comments_for(node)
# Find the node's starting line
node_start_line = node.location.start_line
# Filter comments to only include those that directly precede the node
# We work backwards from the line before the node to find consecutive comments
adjacent_comments = []
expected_line = node_start_line - 1
# Process comments in reverse order to work backwards from the node
node.comments.reverse_each do |comment|
comment_line = comment.location.start_line
# If this comment is on the expected line, it's adjacent
if comment_line == expected_line
adjacent_comments.unshift(comment)
expected_line = comment_line - 1
elsif comment_line < expected_line
# If we hit a comment that's too far back, stop
break
end
# If comment_line > expected_line, skip it (it's not adjacent)
end
# Clean and return the adjacent comments
adjacent_comments.map do |comment|
text = comment.slice
# Remove leading # and optional whitespace
text.sub(/\A\#\s?/, "")
end
end
def parse_source(source)
Parse the given source object, can be a string or a Source instance.
Signature
-
parameter
source
String | Source
The source to parse.
Implementation
def parse_source(source)
if source.is_a?(Source)
Prism.parse(source.read, filepath: source.path)
else
Prism.parse(source)
end
end