Skip to content

Commit

Permalink
Merge pull request #384 from DataDog/feature/add_http_url_quantizer
Browse files Browse the repository at this point in the history
Add HTTP url quantizer
  • Loading branch information
delner authored Mar 27, 2018
2 parents c86ccc1 + 3204d8a commit 8ba47ae
Show file tree
Hide file tree
Showing 3 changed files with 315 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/ddtrace.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
require 'ddtrace/pin'
require 'ddtrace/tracer'
require 'ddtrace/error'
require 'ddtrace/quantization/http'
require 'ddtrace/pipeline'
require 'ddtrace/configuration'
require 'ddtrace/patcher'
Expand Down
86 changes: 86 additions & 0 deletions lib/ddtrace/quantization/http.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
require 'uri'
require 'set'

module Datadog
module Quantization
# Quantization for HTTP resources
module HTTP
PLACEHOLDER = '?'.freeze

module_function

def url(url, options = {})
url!(url, options)
rescue StandardError
options[:placeholder] || PLACEHOLDER
end

def url!(url, options = {})
options ||= {}

URI.parse(url).tap do |uri|
# Format the query string
if uri.query
query = query(uri.query, options[:query])
uri.query = (!query.nil? && query.empty? ? nil : query)
end

# Remove any URI framents
uri.fragment = nil unless options[:fragment] == :show
end.to_s
end

def query(query, options = {})
query!(query, options)
rescue StandardError
options[:placeholder] || PLACEHOLDER
end

def query!(query, options = {})
options ||= {}
options[:show] = options[:show] || []
options[:exclude] = options[:exclude] || []

# Short circuit if query string is meant to exclude everything
# or if the query string is meant to include everything
return '' if options[:exclude] == :all
return query if options[:show] == :all

collect_query(query, uniq: true) do |key, value|
if options[:exclude].include?(key)
[nil, nil]
else
value = options[:show].include?(key) ? value : nil
[key, value]
end
end
end

# Iterate over each key value pair, yielding to the block given.
# Accepts :uniq option, which keeps uniq copies of keys without values.
# e.g. Reduces "foo&bar=bar&bar=bar&foo" to "foo&bar=bar&bar=bar"
def collect_query(query, options = {})
return query unless block_given?
uniq = options[:uniq].nil? ? false : options[:uniq]
keys = Set.new

delims = query.scan(/(^|&|;)/).flatten
query.split(/[&;]/).collect.with_index do |pairs, i|
key, value = pairs.split('=', 2)
key, value = yield(key, value, delims[i])
if uniq && keys.include?(key)
''
elsif key && value
"#{delims[i]}#{key}=#{value}"
elsif key
"#{delims[i]}#{key}".tap { keys << key }
else
''
end
end.join.sub(/^[&;]/, '')
end

private_class_method :collect_query
end
end
end
228 changes: 228 additions & 0 deletions spec/ddtrace/quantization/http_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
# encoding: utf-8
require 'spec_helper'

require 'ddtrace/quantization/http'

RSpec.describe Datadog::Quantization::HTTP do
describe '#url' do
subject(:result) { described_class.url(url, options) }
let(:options) { {} }

context 'given a URL' do
let(:url) { 'http://example.com/path?category_id=1&sort_by=asc#featured' }

context 'default behavior' do
it { is_expected.to eq('http://example.com/path?category_id&sort_by') }
end

context 'default behavior for an array' do
let(:url) { 'http://example.com/path?categories[]=1&categories[]=2' }
it { is_expected.to eq('http://example.com/path?categories[]') }
end

context 'with query: show: value' do
let(:options) { { query: { show: ['category_id'] } } }
it { is_expected.to eq('http://example.com/path?category_id=1&sort_by') }
end

context 'with query: show: :all' do
let(:options) { { query: { show: :all } } }
it { is_expected.to eq('http://example.com/path?category_id=1&sort_by=asc') }
end

context 'with query: exclude: value' do
let(:options) { { query: { exclude: ['sort_by'] } } }
it { is_expected.to eq('http://example.com/path?category_id') }
end

context 'with query: exclude: :all' do
let(:options) { { query: { exclude: :all } } }
it { is_expected.to eq('http://example.com/path') }
end

context 'with show: :all' do
let(:options) { { fragment: :show } }
it { is_expected.to eq('http://example.com/path?category_id&sort_by#featured') }
end

context 'with Unicode characters' do
# URLs do not permit unencoded non-ASCII characters in the URL.
let(:url) { "http://example.com/path?繋がってて" }
it { is_expected.to eq(described_class::PLACEHOLDER) }
end
end
end

describe '#query' do
subject(:result) { described_class.query(query, options) }

context 'given a query' do
context 'and no options' do
let(:options) { {} }

context 'with a single parameter' do
let(:query) { 'foo=foo' }
it { is_expected.to eq('foo') }

context 'with an invalid byte sequence' do
# \255 is off-limits https://en.wikipedia.org/wiki/UTF-8#Codepage_layout
# There isn't a graceful way to handle this without stripping interesting
# characters out either; so just raise an error and default to the placeholder.
let(:query) { "foo\255=foo" }
it { is_expected.to eq('?') }
end
end

context 'with multiple parameters' do
let(:query) { 'foo=foo&bar=bar' }
it { is_expected.to eq('foo&bar') }
end

context 'with array-style parameters' do
let(:query) { 'foo[]=bar&foo[]=baz' }
it { is_expected.to eq('foo[]') }
end

context 'with semi-colon style parameters' do
let(:query) { 'foo;bar' }
# Notice semicolons aren't preseved... no great way of handling this.
# Semicolons are illegal as of 2014... so this is an edge case.
# See https://www.w3.org/TR/2014/REC-html5-20141028/forms.html#url-encoded-form-data
it { is_expected.to eq('foo;bar') }
end

context 'with object-style parameters' do
let(:query) { 'user[id]=1&user[name]=Nathan' }
it { is_expected.to eq('user[id]&user[name]') }

context 'that are complex' do
let(:query) { 'users[][id]=1&users[][name]=Nathan&users[][id]=2&users[][name]=Emma' }
it { is_expected.to eq('users[][id]&users[][name]') }
end
end
end

context 'and a show: :all option' do
let(:query) { 'foo=foo&bar=bar' }
let(:options) { { show: :all } }
it { is_expected.to eq(query) }
end

context 'and a show option' do
context 'with a single parameter' do
let(:query) { 'foo=foo' }
let(:key) { 'foo' }
let(:options) { { show: [key] } }
it { is_expected.to eq('foo=foo') }

context 'that has a Unicode key' do
let(:query) { '繋=foo' }
let(:key) { '繋' }
it { is_expected.to eq('繋=foo') }

context 'that is encoded' do
let(:query) { '%E7%B9%8B=foo' }
let(:key) { '%E7%B9%8B' }
it { is_expected.to eq('%E7%B9%8B=foo') }
end
end

context 'that has a Unicode value' do
let(:query) { 'foo=繋' }
let(:key) { 'foo' }
it { is_expected.to eq('foo=繋') }

context 'that is encoded' do
let(:query) { 'foo=%E7%B9%8B' }
it { is_expected.to eq('foo=%E7%B9%8B') }
end
end

context 'that has a Unicode key and value' do
let(:query) { '繋=繋' }
let(:key) { '繋' }
it { is_expected.to eq('繋=繋') }

context 'that is encoded' do
let(:query) { '%E7%B9%8B=%E7%B9%8B' }
let(:key) { '%E7%B9%8B' }
it { is_expected.to eq('%E7%B9%8B=%E7%B9%8B') }
end
end
end

context 'with multiple parameters' do
let(:query) { 'foo=foo&bar=bar' }
let(:options) { { show: ['foo'] } }
it { is_expected.to eq('foo=foo&bar') }
end

context 'with array-style parameters' do
let(:query) { 'foo[]=bar&foo[]=baz' }
let(:options) { { show: ['foo[]'] } }
it { is_expected.to eq('foo[]=bar&foo[]=baz') }

context 'that contains encoded braces' do
let(:query) { 'foo[]=%5Bbar%5D&foo[]=%5Bbaz%5D' }
it { is_expected.to eq('foo[]=%5Bbar%5D&foo[]=%5Bbaz%5D') }

context 'that exactly matches the key' do
let(:query) { 'foo[]=foo%5B%5D&foo[]=foo%5B%5D' }
it { is_expected.to eq('foo[]=foo%5B%5D&foo[]=foo%5B%5D') }
end
end
end

context 'with object-style parameters' do
let(:query) { 'user[id]=1&user[name]=Nathan' }
let(:options) { { show: ['user[id]'] } }
it { is_expected.to eq('user[id]=1&user[name]') }

context 'that are complex' do
let(:query) { 'users[][id]=1&users[][name]=Nathan&users[][id]=2&users[][name]=Emma' }
let(:options) { { show: ['users[][id]'] } }
it { is_expected.to eq('users[][id]=1&users[][name]&users[][id]=2') }
end
end
end

context 'and an exclude: :all option' do
let(:query) { 'foo=foo&bar=bar' }
let(:options) { { exclude: :all } }
it { is_expected.to eq('') }
end

context 'and an exclude option' do
context 'with a single parameter' do
let(:query) { 'foo=foo' }
let(:options) { { exclude: ['foo'] } }
it { is_expected.to eq('') }
end

context 'with multiple parameters' do
let(:query) { 'foo=foo&bar=bar' }
let(:options) { { exclude: ['foo'] } }
it { is_expected.to eq('bar') }
end

context 'with array-style parameters' do
let(:query) { 'foo[]=bar&foo[]=baz' }
let(:options) { { exclude: ['foo[]'] } }
it { is_expected.to eq('') }
end

context 'with object-style parameters' do
let(:query) { 'user[id]=1&user[name]=Nathan' }
let(:options) { { exclude: ['user[name]'] } }
it { is_expected.to eq('user[id]') }

context 'that are complex' do
let(:query) { 'users[][id]=1&users[][name]=Nathan&users[][id]=2&users[][name]=Emma' }
let(:options) { { exclude: ['users[][name]'] } }
it { is_expected.to eq('users[][id]') }
end
end
end
end
end
end

0 comments on commit 8ba47ae

Please sign in to comment.