Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 44 additions & 13 deletions lib/ruby_indexer/lib/ruby_indexer/uri.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ class Generic
# NOTE: We also define this in the shim
PARSER = const_defined?(:RFC2396_PARSER) ? RFC2396_PARSER : DEFAULT_PARSER

# This unsafe regex is the same one used in the URI::RFC2396_REGEXP class with the exception of the fact that we
# do not include colon as a safe character. VS Code URIs always escape colons and we need to ensure we do the
# same to avoid inconsistencies in our URIs, which are used to identify resources
UNSAFE_REGEX = %r{[^\-_.!~*'()a-zA-Z\d;/?@&=+$,\[\]]}

class << self
#: (path: String, ?fragment: String?, ?scheme: String, ?load_path_entry: String?) -> URI::Generic
def from_path(path:, fragment: nil, scheme: "file", load_path_entry: nil)
# This unsafe regex is the same one used in the URI::RFC2396_REGEXP class with the exception of the fact that we
# do not include colon as a safe character. VS Code URIs always escape colons and we need to ensure we do the
# same to avoid inconsistencies in our URIs, which are used to identify resources
unsafe_regex = %r{[^\-_.!~*'()a-zA-Z\d;/?@&=+$,\[\]]}

def from_win_path(path:, fragment: nil, scheme: "file", load_path_entry: nil)
# On Windows, if the path begins with the disk name, we need to add a leading slash to make it a valid URI
escaped_path = if /^[A-Z]:/i.match?(path)
PARSER.escape("/#{path}", unsafe_regex)
PARSER.escape("/#{path}", UNSAFE_REGEX)
elsif path.start_with?("//?/")
# Some paths on Windows start with "//?/". This is a special prefix that allows for long file paths
PARSER.escape(path.delete_prefix("//?"), unsafe_regex)
PARSER.escape(path.delete_prefix("//?"), UNSAFE_REGEX)
else
PARSER.escape(path, unsafe_regex)
PARSER.escape(path, UNSAFE_REGEX)
end

uri = build(scheme: scheme, path: escaped_path, fragment: fragment)
Expand All @@ -38,6 +38,21 @@ def from_path(path:, fragment: nil, scheme: "file", load_path_entry: nil)

uri
end

#: (path: String, ?fragment: String?, ?scheme: String, ?load_path_entry: String?) -> URI::Generic
def from_unix_path(path:, fragment: nil, scheme: "file", load_path_entry: nil)
escaped_path = PARSER.escape(path, UNSAFE_REGEX)

uri = build(scheme: scheme, path: escaped_path, fragment: fragment)

if load_path_entry
uri.require_path = path.delete_prefix("#{load_path_entry}/").delete_suffix(".rb")
end

uri
end

alias_method :from_path, Gem.win_platform? ? :from_win_path : :from_unix_path
end

#: String?
Expand All @@ -52,21 +67,37 @@ def add_require_path_from_load_entry(load_path_entry)
end

#: -> String?
def to_standardized_path
# On Windows, when we're getting the file system path back from the URI, we need to remove the leading forward
# slash
def to_standardized_win_path
parsed_path = path

return unless parsed_path

# we can bail out parsing if there is nothing to unescape
return parsed_path unless parsed_path.match?(/%[0-9A-Fa-f]{2}/)
Comment on lines +77 to +78
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can we avoid unescaping here?


unescaped_path = PARSER.unescape(parsed_path)

# On Windows, when we're getting the file system path back from the URI, we need to remove the leading forward
# slash
if %r{^/[A-Z]:}i.match?(unescaped_path)
unescaped_path.delete_prefix("/")
else
unescaped_path
end
end

alias_method :full_path, :to_standardized_path
#: -> String?
def to_standardized_unix_path
unescaped_path = path
return unless unescaped_path

# we can bail out parsing if there is nothing to be unescaped
return unescaped_path unless unescaped_path.match?(/%[0-9A-Fa-f]{2}/)
Comment on lines +94 to +95
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question, why does this not need to be escaped?


PARSER.unescape(unescaped_path)
end

alias_method :to_standardized_path, Gem.win_platform? ? :to_standardized_win_path : :to_standardized_unix_path
alias_method :full_path, Gem.win_platform? ? :to_standardized_win_path : :to_standardized_unix_path
end
end
30 changes: 15 additions & 15 deletions lib/ruby_indexer/test/uri_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,38 @@
module RubyIndexer
class URITest < Minitest::Test
def test_from_path_on_unix
uri = URI::Generic.from_path(path: "/some/unix/path/to/file.rb")
uri = URI::Generic.from_unix_path(path: "/some/unix/path/to/file.rb")
assert_equal("/some/unix/path/to/file.rb", uri.path)
end

def test_from_path_on_windows
uri = URI::Generic.from_path(path: "C:/some/windows/path/to/file.rb")
uri = URI::Generic.from_win_path(path: "C:/some/windows/path/to/file.rb")
assert_equal("/C%3A/some/windows/path/to/file.rb", uri.path)
end

def test_from_path_on_windows_with_lowercase_drive
uri = URI::Generic.from_path(path: "c:/some/windows/path/to/file.rb")
uri = URI::Generic.from_win_path(path: "c:/some/windows/path/to/file.rb")
assert_equal("/c%3A/some/windows/path/to/file.rb", uri.path)
end

def test_to_standardized_path_on_unix
uri = URI::Generic.from_path(path: "/some/unix/path/to/file.rb")
assert_equal(uri.path, uri.to_standardized_path)
uri = URI::Generic.from_unix_path(path: "/some/unix/path/to/file.rb")
assert_equal(uri.path, uri.to_standardized_win_path)
end

def test_to_standardized_path_on_windows
uri = URI::Generic.from_path(path: "C:/some/windows/path/to/file.rb")
assert_equal("C:/some/windows/path/to/file.rb", uri.to_standardized_path)
uri = URI::Generic.from_win_path(path: "C:/some/windows/path/to/file.rb")
assert_equal("C:/some/windows/path/to/file.rb", uri.to_standardized_win_path)
end

def test_to_standardized_path_on_windows_with_lowercase_drive
uri = URI::Generic.from_path(path: "c:/some/windows/path/to/file.rb")
assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_path)
uri = URI::Generic.from_win_path(path: "c:/some/windows/path/to/file.rb")
assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_win_path)
end

def test_to_standardized_path_on_windows_with_received_uri
uri = URI("file:///c%3A/some/windows/path/to/file.rb")
assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_path)
assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_win_path)
end

def test_plus_signs_are_properly_unescaped
Expand All @@ -52,8 +52,8 @@ def test_from_path_with_fragment
end

def test_from_path_windows_long_file_paths
uri = URI::Generic.from_path(path: "//?/C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb")
assert_equal("C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb", uri.to_standardized_path)
uri = URI::Generic.from_win_path(path: "//?/C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb")
assert_equal("C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb", uri.to_standardized_win_path)
end

def test_from_path_computes_require_path_when_load_path_entry_is_given
Expand All @@ -70,14 +70,14 @@ def test_allows_adding_require_path_with_load_path_entry
end

def test_from_path_escapes_colon_characters
uri = URI::Generic.from_path(path: "c:/some/windows/path with/spaces/file.rb")
assert_equal("c:/some/windows/path with/spaces/file.rb", uri.to_standardized_path)
uri = URI::Generic.from_win_path(path: "c:/some/windows/path with/spaces/file.rb")
assert_equal("c:/some/windows/path with/spaces/file.rb", uri.to_standardized_win_path)
assert_equal("file:///c%3A/some/windows/path%20with/spaces/file.rb", uri.to_s)
end

def test_from_path_with_unicode_characters
path = "/path/with/unicode/文件.rb"
uri = URI::Generic.from_path(path: path)
uri = URI::Generic.from_unix_path(path: path)
assert_equal(path, uri.to_standardized_path)
assert_equal("file:///path/with/unicode/%E6%96%87%E4%BB%B6.rb", uri.to_s)
end
Expand Down
16 changes: 16 additions & 0 deletions sorbet/rbi/shims/uri.rbi
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,22 @@ module URI

class Generic
PARSER = T.let(const_defined?(:RFC2396_PARSER) ? RFC2396_PARSER : DEFAULT_PARSER, RFC2396_Parser)

sig { returns(T.nilable(String)) }
def to_standardized_path; end

sig { returns(T.nilable(String)) }
def full_path; end

sig do
params(
path: String,
fragment: T.nilable(String),
scheme: String,
load_path_entry: T.nilable(String)
).returns(::URI::Generic)
end
def self.from_path(path:, fragment: nil, scheme: "file", load_path_entry: nil); end
end

class File
Expand Down
Loading