From fb08c6ca3e039412be0263e973d22f860b0a8c74 Mon Sep 17 00:00:00 2001 From: Loren Segal Date: Tue, 3 Sep 2024 01:48:50 -0700 Subject: [PATCH] Fix processing of utf-8 files Fixes #1517 --- lib/yard/parser/source_parser.rb | 3 ++- spec/parser/source_parser_spec.rb | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/yard/parser/source_parser.rb b/lib/yard/parser/source_parser.rb index d42ea6f70..82dd90f69 100644 --- a/lib/yard/parser/source_parser.rb +++ b/lib/yard/parser/source_parser.rb @@ -107,7 +107,8 @@ def parse(paths = DEFAULT_PATH_GLOB, excluded = [], level = log.level) files = [paths].flatten. map {|p| File.directory?(p) ? "#{p}/**/*.{rb,c,cc,cxx,cpp}" : p }. map {|p| p.include?("*") ? Dir[p].sort_by {|d| [d.length, d] } : p }.flatten. - reject {|p| !File.file?(p) || excluded.any? {|re| p =~ re } } + reject {|p| !File.file?(p) || excluded.any? {|re| p =~ re } }. + map {|p| p.encoding == Encoding.default_external ? p : p.dup.force_encoding(Encoding.default_external) } log.enter_level(level) do parse_in_order(*files.uniq) diff --git a/spec/parser/source_parser_spec.rb b/spec/parser/source_parser_spec.rb index b0cc371ac..e88a554e9 100644 --- a/spec/parser/source_parser_spec.rb +++ b/spec/parser/source_parser_spec.rb @@ -499,6 +499,15 @@ class Foo < Bar YARD.parse ['foo'] end + it "converts globs into UTF-8" do + expect(Dir).to receive(:[]).with('lib/**/*.rb').and_return(['lib/é.rb']) + expect(File).to receive(:file?).with('lib/é.rb').and_return(true) + expect(File).to receive(:read_binary).with('lib/é.rb').and_return("class A; end") + + YARD.parse ['lib/**/*.rb'] + expect(Registry.at('A')).not_to be nil + end + it "uses Registry.checksums cache if file is cached" do data = 'DATA' hash = Registry.checksum_for(data)