Skip to content

Commit

Permalink
Remove invalid UTF-8 characters from nuspec response body
Browse files Browse the repository at this point in the history
  • Loading branch information
JamieMagee committed Jan 29, 2024
1 parent aca169f commit f087f20
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
9 changes: 6 additions & 3 deletions nuget/lib/dependabot/nuget/update_checker/nuspec_fetcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def self.fetch_nuspec_from_repository(repository_details, package_id, package_ve

return unless nuspec_response.status == 200

nuspec_response_body = remove_wrapping_zero_width_chars(nuspec_response.body)
nuspec_response_body = remove_invalid_characters(nuspec_response.body)
nuspec_xml = Nokogiri::XML(nuspec_response_body)
else
# no guarantee we can directly query the .nuspec; fall back to extracting it from the .nupkg
Expand Down Expand Up @@ -75,8 +75,11 @@ def self.extract_nuspec(zip_stream, package_id)
nil
end

def self.remove_wrapping_zero_width_chars(string)
string.force_encoding("UTF-8").encode
def self.remove_invalid_characters(string)
string.dup
.force_encoding(Encoding::UTF_8)
.encode
.scrub("")
.gsub(/\A[\u200B-\u200D\uFEFF]/, "")
.gsub(/[\u200B-\u200D\uFEFF]\Z/, "")
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,13 @@
it { is_expected.to be_falsy }
end
end

describe "remove_invalid_characters" do
context "when a utf-16 bom is present" do
let(:response_body) { "\xFE\xFF<xml></xml>" }
subject(:result) { described_class.remove_invalid_characters(response_body) }

it { is_expected.to eq("<xml></xml>") }
end
end
end

0 comments on commit f087f20

Please sign in to comment.