Skip to content

Commit e87f701

Browse files
committed
Make URI parsing more platform aware
1 parent 159e8c9 commit e87f701

File tree

3 files changed

+75
-28
lines changed

3 files changed

+75
-28
lines changed

lib/ruby_indexer/lib/ruby_indexer/uri.rb

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,22 @@ class Generic
1212
# NOTE: We also define this in the shim
1313
PARSER = const_defined?(:RFC2396_PARSER) ? RFC2396_PARSER : DEFAULT_PARSER
1414

15+
# This unsafe regex is the same one used in the URI::RFC2396_REGEXP class with the exception of the fact that we
16+
# do not include colon as a safe character. VS Code URIs always escape colons and we need to ensure we do the
17+
# same to avoid inconsistencies in our URIs, which are used to identify resources
18+
UNSAFE_REGEX = %r{[^\-_.!~*'()a-zA-Z\d;/?@&=+$,\[\]]}
19+
1520
class << self
1621
#: (path: String, ?fragment: String?, ?scheme: String, ?load_path_entry: String?) -> URI::Generic
17-
def from_path(path:, fragment: nil, scheme: "file", load_path_entry: nil)
18-
# This unsafe regex is the same one used in the URI::RFC2396_REGEXP class with the exception of the fact that we
19-
# do not include colon as a safe character. VS Code URIs always escape colons and we need to ensure we do the
20-
# same to avoid inconsistencies in our URIs, which are used to identify resources
21-
unsafe_regex = %r{[^\-_.!~*'()a-zA-Z\d;/?@&=+$,\[\]]}
22-
22+
def from_win_path(path:, fragment: nil, scheme: "file", load_path_entry: nil)
2323
# On Windows, if the path begins with the disk name, we need to add a leading slash to make it a valid URI
2424
escaped_path = if /^[A-Z]:/i.match?(path)
25-
PARSER.escape("/#{path}", unsafe_regex)
25+
PARSER.escape("/#{path}", UNSAFE_REGEX)
2626
elsif path.start_with?("//?/")
2727
# Some paths on Windows start with "//?/". This is a special prefix that allows for long file paths
28-
PARSER.escape(path.delete_prefix("//?"), unsafe_regex)
28+
PARSER.escape(path.delete_prefix("//?"), UNSAFE_REGEX)
2929
else
30-
PARSER.escape(path, unsafe_regex)
30+
PARSER.escape(path, UNSAFE_REGEX)
3131
end
3232

3333
uri = build(scheme: scheme, path: escaped_path, fragment: fragment)
@@ -38,6 +38,21 @@ def from_path(path:, fragment: nil, scheme: "file", load_path_entry: nil)
3838

3939
uri
4040
end
41+
42+
#: (path: String, ?fragment: String?, ?scheme: String, ?load_path_entry: String?) -> URI::Generic
43+
def from_unix_path(path:, fragment: nil, scheme: "file", load_path_entry: nil)
44+
escaped_path = PARSER.escape(path, UNSAFE_REGEX)
45+
46+
uri = build(scheme: scheme, path: escaped_path, fragment: fragment)
47+
48+
if load_path_entry
49+
uri.require_path = path.delete_prefix("#{load_path_entry}/").delete_suffix(".rb")
50+
end
51+
52+
uri
53+
end
54+
55+
alias_method :from_path, Gem.win_platform? ? :from_win_path : :from_unix_path
4156
end
4257

4358
#: String?
@@ -52,21 +67,37 @@ def add_require_path_from_load_entry(load_path_entry)
5267
end
5368

5469
#: -> String?
55-
def to_standardized_path
70+
# On Windows, when we're getting the file system path back from the URI, we need to remove the leading forward
71+
# slash
72+
def to_standardized_win_path
5673
parsed_path = path
74+
5775
return unless parsed_path
5876

77+
# we can bail out parsing if there is nothing to unescape
78+
return parsed_path unless parsed_path.match?(/%[0-9A-Fa-f]{2}/)
79+
5980
unescaped_path = PARSER.unescape(parsed_path)
6081

61-
# On Windows, when we're getting the file system path back from the URI, we need to remove the leading forward
62-
# slash
6382
if %r{^/[A-Z]:}i.match?(unescaped_path)
6483
unescaped_path.delete_prefix("/")
6584
else
6685
unescaped_path
6786
end
6887
end
6988

70-
alias_method :full_path, :to_standardized_path
89+
#: -> String?
90+
def to_standardized_unix_path
91+
unescaped_path = path
92+
return unless unescaped_path
93+
94+
# we can bail out parsing if there is nothing to be unescaped
95+
return unescaped_path unless unescaped_path.match?(/%[0-9A-Fa-f]{2}/)
96+
97+
PARSER.unescape(unescaped_path)
98+
end
99+
100+
alias_method :to_standardized_path, Gem.win_platform? ? :to_standardized_win_path : :to_standardized_unix_path
101+
alias_method :full_path, Gem.win_platform? ? :to_standardized_win_path : :to_standardized_unix_path
71102
end
72103
end

lib/ruby_indexer/test/uri_test.rb

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,38 @@
66
module RubyIndexer
77
class URITest < Minitest::Test
88
def test_from_path_on_unix
9-
uri = URI::Generic.from_path(path: "/some/unix/path/to/file.rb")
9+
uri = URI::Generic.from_unix_path(path: "/some/unix/path/to/file.rb")
1010
assert_equal("/some/unix/path/to/file.rb", uri.path)
1111
end
1212

1313
def test_from_path_on_windows
14-
uri = URI::Generic.from_path(path: "C:/some/windows/path/to/file.rb")
14+
uri = URI::Generic.from_win_path(path: "C:/some/windows/path/to/file.rb")
1515
assert_equal("/C%3A/some/windows/path/to/file.rb", uri.path)
1616
end
1717

1818
def test_from_path_on_windows_with_lowercase_drive
19-
uri = URI::Generic.from_path(path: "c:/some/windows/path/to/file.rb")
19+
uri = URI::Generic.from_win_path(path: "c:/some/windows/path/to/file.rb")
2020
assert_equal("/c%3A/some/windows/path/to/file.rb", uri.path)
2121
end
2222

2323
def test_to_standardized_path_on_unix
24-
uri = URI::Generic.from_path(path: "/some/unix/path/to/file.rb")
25-
assert_equal(uri.path, uri.to_standardized_path)
24+
uri = URI::Generic.from_unix_path(path: "/some/unix/path/to/file.rb")
25+
assert_equal(uri.path, uri.to_standardized_win_path)
2626
end
2727

2828
def test_to_standardized_path_on_windows
29-
uri = URI::Generic.from_path(path: "C:/some/windows/path/to/file.rb")
30-
assert_equal("C:/some/windows/path/to/file.rb", uri.to_standardized_path)
29+
uri = URI::Generic.from_win_path(path: "C:/some/windows/path/to/file.rb")
30+
assert_equal("C:/some/windows/path/to/file.rb", uri.to_standardized_win_path)
3131
end
3232

3333
def test_to_standardized_path_on_windows_with_lowercase_drive
34-
uri = URI::Generic.from_path(path: "c:/some/windows/path/to/file.rb")
35-
assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_path)
34+
uri = URI::Generic.from_win_path(path: "c:/some/windows/path/to/file.rb")
35+
assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_win_path)
3636
end
3737

3838
def test_to_standardized_path_on_windows_with_received_uri
3939
uri = URI("file:///c%3A/some/windows/path/to/file.rb")
40-
assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_path)
40+
assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_win_path)
4141
end
4242

4343
def test_plus_signs_are_properly_unescaped
@@ -52,8 +52,8 @@ def test_from_path_with_fragment
5252
end
5353

5454
def test_from_path_windows_long_file_paths
55-
uri = URI::Generic.from_path(path: "//?/C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb")
56-
assert_equal("C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb", uri.to_standardized_path)
55+
uri = URI::Generic.from_win_path(path: "//?/C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb")
56+
assert_equal("C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb", uri.to_standardized_win_path)
5757
end
5858

5959
def test_from_path_computes_require_path_when_load_path_entry_is_given
@@ -70,14 +70,14 @@ def test_allows_adding_require_path_with_load_path_entry
7070
end
7171

7272
def test_from_path_escapes_colon_characters
73-
uri = URI::Generic.from_path(path: "c:/some/windows/path with/spaces/file.rb")
74-
assert_equal("c:/some/windows/path with/spaces/file.rb", uri.to_standardized_path)
73+
uri = URI::Generic.from_win_path(path: "c:/some/windows/path with/spaces/file.rb")
74+
assert_equal("c:/some/windows/path with/spaces/file.rb", uri.to_standardized_win_path)
7575
assert_equal("file:///c%3A/some/windows/path%20with/spaces/file.rb", uri.to_s)
7676
end
7777

7878
def test_from_path_with_unicode_characters
7979
path = "/path/with/unicode/文件.rb"
80-
uri = URI::Generic.from_path(path: path)
80+
uri = URI::Generic.from_unix_path(path: path)
8181
assert_equal(path, uri.to_standardized_path)
8282
assert_equal("file:///path/with/unicode/%E6%96%87%E4%BB%B6.rb", uri.to_s)
8383
end

sorbet/rbi/shims/uri.rbi

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,22 @@ module URI
55

66
class Generic
77
PARSER = T.let(const_defined?(:RFC2396_PARSER) ? RFC2396_PARSER : DEFAULT_PARSER, RFC2396_Parser)
8+
9+
sig { returns(T.nilable(String)) }
10+
def to_standardized_path; end
11+
12+
sig { returns(T.nilable(String)) }
13+
def full_path; end
14+
15+
sig do
16+
params(
17+
path: String,
18+
fragment: T.nilable(String),
19+
scheme: String,
20+
load_path_entry: T.nilable(String)
21+
).returns(::URI::Generic)
22+
end
23+
def self.from_path(path:, fragment: nil, scheme: "file", load_path_entry: nil); end
824
end
925

1026
class File

0 commit comments

Comments
 (0)