Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix encoding of streamed chunk #644

Merged
merged 4 commits into from
Feb 11, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lib/httparty.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
require 'httparty/version'
require 'httparty/connection_adapter'
require 'httparty/logger/logger'
require 'httparty/request/body'
require 'httparty/fragment_with_response'
require 'httparty/text_encoder'

# @see HTTParty::ClassMethods
module HTTParty
Expand Down
82 changes: 12 additions & 70 deletions lib/httparty/request.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
require 'erb'
require 'httparty/request/body'
require 'httparty/fragment_with_response'

module HTTParty
class Request #:nodoc:
Expand Down Expand Up @@ -148,15 +146,15 @@ def perform(&block)
chunks = []

http_response.read_body do |fragment|
chunks << fragment unless options[:stream_body]
block.call FragmentWithResponse.new(fragment, http_response)
encoded_fragment = encode_text(fragment, http_response['content-type'])
chunks << encoded_fragment if !options[:stream_body]
block.call FragmentWithResponse.new(encoded_fragment, http_response)
end

chunked_body = chunks.join
end
end


handle_host_redirection if response_redirects?
result = handle_unauthorized
result ||= handle_response(chunked_body, &block)
Expand Down Expand Up @@ -273,74 +271,10 @@ def query_string(uri)
query_string_parts.size > 0 ? query_string_parts.join('&') : nil
end

def get_charset
content_type = last_response["content-type"]
if content_type.nil?
return nil
end

if content_type =~ /;\s*charset\s*=\s*([^=,;"\s]+)/i
return $1
end

if content_type =~ /;\s*charset\s*=\s*"((\\.|[^\\"])+)"/i
return $1.gsub(/\\(.)/, '\1')
end

nil
end

def encode_with_ruby_encoding(body, charset)
# NOTE: This will raise an argument error if the
# charset does not exist
encoding = Encoding.find(charset)
body.force_encoding(encoding.to_s)
rescue ArgumentError
body
end

def assume_utf16_is_big_endian
options[:assume_utf16_is_big_endian]
end

def encode_utf_16(body)
if body.bytesize >= 2
if body.getbyte(0) == 0xFF && body.getbyte(1) == 0xFE
return body.force_encoding("UTF-16LE")
elsif body.getbyte(0) == 0xFE && body.getbyte(1) == 0xFF
return body.force_encoding("UTF-16BE")
end
end

if assume_utf16_is_big_endian
body.force_encoding("UTF-16BE")
else
body.force_encoding("UTF-16LE")
end
end

def _encode_body(body)
charset = get_charset

if charset.nil?
return body
end

if "utf-16".casecmp(charset) == 0
encode_utf_16(body)
else
encode_with_ruby_encoding(body, charset)
end
end

def encode_body(body)
if "".respond_to?(:encoding)
_encode_body(body)
else
body
end
end

def handle_response(body, &block)
if response_redirects?
options[:limit] -= 1
Expand All @@ -363,7 +297,7 @@ def handle_response(body, &block)
perform(&block)
else
body ||= last_response.body
body = body.nil? ? body : encode_body(body)
body = body.nil? ? body : encode_text(body, last_response['content-type'])
Response.new(self, last_response, lambda { parse_response(body) }, body: body)
end
end
Expand Down Expand Up @@ -439,5 +373,13 @@ def set_basic_auth_from_uri
@credentials_sent = true
end
end

def encode_text(text, content_type)
TextEncoder.new(
text: text,
content_type: content_type,
assume_utf16_is_big_endian: assume_utf16_is_big_endian
).call
end
end
end
70 changes: 70 additions & 0 deletions lib/httparty/text_encoder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
module HTTParty
class TextEncoder
attr_reader :text, :content_type, :assume_utf16_is_big_endian

def initialize(text:, assume_utf16_is_big_endian:, content_type:)
@text = text
@content_type = content_type
@assume_utf16_is_big_endian = assume_utf16_is_big_endian
end

def call
if can_encode?
encoded_text
else
text
end
end

private

def can_encode?
''.respond_to?(:encoding) && charset
end

def encoded_text
if 'utf-16'.casecmp(charset) == 0
encode_utf_16
else
encode_with_ruby_encoding
end
end

def encode_utf_16
if text.bytesize >= 2
if text.getbyte(0) == 0xFF && text.getbyte(1) == 0xFE
return text.force_encoding("UTF-16LE")
elsif text.getbyte(0) == 0xFE && text.getbyte(1) == 0xFF
return text.force_encoding("UTF-16BE")
end
end

if assume_utf16_is_big_endian # option
text.force_encoding("UTF-16BE")
else
text.force_encoding("UTF-16LE")
end
end

def encode_with_ruby_encoding
# NOTE: This will raise an argument error if the
# charset does not exist
encoding = Encoding.find(charset)
text.force_encoding(encoding.to_s)
rescue ArgumentError
text
end

def charset
return nil if content_type.nil?

if content_type =~ /;\s*charset\s*=\s*([^=,;"\s]+)/i
return $1 # FIX ME
end

if content_type =~ /;\s*charset\s*=\s*"((\\.|[^\\"])+)"/i
return $1.gsub(/\\(.)/, '\1') # PLEASE
end
end
end
end
24 changes: 10 additions & 14 deletions spec/httparty/request_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -505,39 +505,35 @@
end

if "".respond_to?(:encoding)

let(:response_charset) {
@request.send(:get_charset)
}

it "should process charset in content type properly" do
response = stub_response "Content".force_encoding('ascii-8bit')
response.initialize_http_header("Content-Type" => "text/plain;charset = utf-8")
resp = @request.perform
expect(response_charset).to_not be_empty

expect(resp.body.encoding).to eq(Encoding.find("UTF-8"))
end

it "should process charset in content type properly if it has a different case" do
response = stub_response "Content".force_encoding('ascii-8bit')
response.initialize_http_header("Content-Type" => "text/plain;CHARSET = utf-8")
resp = @request.perform
expect(response_charset).to_not be_empty

expect(resp.body.encoding).to eq(Encoding.find("UTF-8"))
end

it "should process quoted charset in content type properly" do
response = stub_response "Content".force_encoding('ascii-8bit')
response.initialize_http_header("Content-Type" => "text/plain;charset = \"utf-8\"")
resp = @request.perform
expect(response_charset).to_not be_empty

expect(resp.body.encoding).to eq(Encoding.find("UTF-8"))
end

it "should process response with a nil body" do
response = stub_response nil
response.initialize_http_header("Content-Type" => "text/html;charset=UTF-8")
resp = @request.perform

expect(resp.body).to be_nil
end

Expand All @@ -547,7 +543,7 @@
response = stub_response "\xFF\xFEC\x00o\x00n\x00t\x00e\x00n\x00t\x00"
response.initialize_http_header("Content-Type" => "text/plain;charset = utf-16")
resp = @request.perform
expect(response_charset).to_not be_empty

expect(resp.body.encoding).to eq(Encoding.find("UTF-16LE"))
end

Expand All @@ -557,7 +553,7 @@
response = stub_response "\xFE\xFF\x00C\x00o\x00n\x00t\x00e\x00n\x00t"
response.initialize_http_header("Content-Type" => "text/plain;charset = utf-16")
resp = @request.perform
expect(response_charset).to_not be_empty

expect(resp.body.encoding).to eq(Encoding.find("UTF-16BE"))
end

Expand All @@ -567,17 +563,17 @@
response = stub_response "C\x00o\x00n\x00t\x00e\x00n\x00t\x00"
response.initialize_http_header("Content-Type" => "text/plain;charset = utf-16")
resp = @request.perform
expect(response_charset).to_not be_empty

expect(resp.body.encoding).to eq(Encoding.find("UTF-16LE"))
end

it "should perform no encoding if the charset is not available" do
response = stub_response "Content"
response.initialize_http_header("Content-Type" => "text/plain;charset = utf-lols")
resp = @request.perform
expect(response_charset).to_not be_empty

# This encoding does not exist, thus the string should not be encodd with it
expect(resp.body.encoding).to_not eq(response_charset)

expect(resp.body).to eq("Content")
expect(resp.body.encoding).to eq("Content".encoding)
end
Expand All @@ -586,7 +582,7 @@
response = stub_response "Content"
response.initialize_http_header("Content-Type" => "text/plain")
resp = @request.perform
expect(response_charset).to be_nil

expect(resp.body).to eq("Content")
expect(resp.body.encoding).to eq("Content".encoding)
end
Expand Down
16 changes: 16 additions & 0 deletions spec/httparty_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,22 @@ def self.inherited(subclass)
).to eq(nil)
end

context 'when streaming body' do
let(:chunk) { 'Content'.force_encoding('ascii-8bit') }
let(:options) { { stream_body: true } }
before do
stub_chunked_http_response_with([chunk], options) do |response|
allow(response).to receive(:[]).with('content-type').and_return('text/plain; charset = "utf-8"')
end
end

specify do
HTTParty.get('http://www.google.com', options) do |fragment|
expect(fragment.encoding).to eq(Encoding.find("UTF-8"))
end
end
end

it "should be able parse response type json automatically" do
stub_http_response_with('twitter.json')
tweets = HTTParty.get('http://twitter.com/statuses/public_timeline.json')
Expand Down
1 change: 1 addition & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
require "simplecov"
SimpleCov.start
require 'pry'

require "httparty"
require 'webmock/rspec'
Expand Down
1 change: 1 addition & 0 deletions spec/support/stub_response.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def stub_chunked_http_response_with(chunks, options = {format: "html"})
def response.read_body(&block)
@body || chunked_data.each(&block)
end
yield(response) if block_given?

http_request = HTTParty::Request.new(Net::HTTP::Get, 'http://localhost', options)
allow(http_request).to receive_message_chain(:http, :request).and_yield(response).and_return(response)
Expand Down