Skip to content

HBASE-11676 Scan FORMATTER is not applied for columns using non-printable name in shell #2161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 71 additions & 18 deletions hbase-shell/src/main/ruby/hbase/table.rb
Original file line number Diff line number Diff line change
Expand Up @@ -449,18 +449,23 @@ def _get_internal(row, *args)
# Print out results. Result can be Cell or RowResult.
res = {}
result.listCells.each do |c|
family = convert_bytes_with_position(c.getFamilyArray,
c.getFamilyOffset, c.getFamilyLength, converter_class, converter)
qualifier = convert_bytes_with_position(c.getQualifierArray,
c.getQualifierOffset, c.getQualifierLength, converter_class, converter)
# Get the family and qualifier of the cell without escaping non-printable characters. It is crucial that
# column is constructed in this consistent way to that it can be used as a key.
family_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getFamilyArray, c.getFamilyOffset, c.getFamilyLength)
qualifier_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getQualifierArray, c.getQualifierOffset, c.getQualifierLength)
column = "#{family_bytes}:#{qualifier_bytes}"

column = "#{family}:#{qualifier}"
value = to_string(column, c, maxlength, converter_class, converter)

# Use the FORMATTER to determine how column is printed
family = convert_bytes(family_bytes, converter_class, converter)
qualifier = convert_bytes(qualifier_bytes, converter_class, converter)
formatted_column = "#{family}:#{qualifier}"

if block_given?
yield(column, value)
yield(formatted_column, value)
else
res[column] = value
res[formatted_column] = value
end
end

Expand Down Expand Up @@ -604,19 +609,24 @@ def _scan_internal(args = {}, scan = nil)
is_stale |= row.isStale

row.listCells.each do |c|
family = convert_bytes_with_position(c.getFamilyArray,
c.getFamilyOffset, c.getFamilyLength, converter_class, converter)
qualifier = convert_bytes_with_position(c.getQualifierArray,
c.getQualifierOffset, c.getQualifierLength, converter_class, converter)
# Get the family and qualifier of the cell without escaping non-printable characters. It is crucial that
# column is constructed in this consistent way to that it can be used as a key.
family_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getFamilyArray, c.getFamilyOffset, c.getFamilyLength)
qualifier_bytes = org.apache.hadoop.hbase.util.Bytes.copy(c.getQualifierArray, c.getQualifierOffset, c.getQualifierLength)
column = "#{family_bytes}:#{qualifier_bytes}"

column = "#{family}:#{qualifier}"
cell = to_string(column, c, maxlength, converter_class, converter)

# Use the FORMATTER to determine how column is printed
family = convert_bytes(family_bytes, converter_class, converter)
qualifier = convert_bytes(qualifier_bytes, converter_class, converter)
formatted_column = "#{family}:#{qualifier}"

if block_given?
yield(key, "column=#{column}, #{cell}")
yield(key, "column=#{formatted_column}, #{cell}")
else
res[key] ||= {}
res[key][column] = cell
res[key][formatted_column] = cell
end
end
# One more row processed
Expand Down Expand Up @@ -729,11 +739,15 @@ def is_meta_table?
org.apache.hadoop.hbase.TableName::META_TABLE_NAME.equals(@table.getName)
end

# Returns family and (when has it) qualifier for a column name
# Given a column specification in the format FAMILY[:QUALIFIER[:CONVERTER]]
# 1. Save the converter for the given column
# 2. Return a 2-element Array with [family, qualifier or nil], discarding the converter if provided
#
# @param [String] column specification
def parse_column_name(column)
split = org.apache.hadoop.hbase.CellUtil.parseColumn(column.to_java_bytes)
set_converter(split) if split.length > 1
[split[0], split.length > 1 ? split[1] : nil]
spec = parse_column_format_spec(column)
set_column_converter(spec.family, spec.qualifier, spec.converter) unless spec.converter.nil?
[spec.family, spec.qualifier]
end

def toISO8601(millis)
Expand Down Expand Up @@ -806,9 +820,46 @@ def convert_bytes_with_position(bytes, offset, len, converter_class, converter_m
eval(converter_class).method(converter_method).call(bytes, offset, len)
end

# store the information designating what part of a column should be printed, and how
ColumnFormatSpec = Struct.new(:family, :qualifier, :converter)

##
# Parse the column specification for formatting used by shell commands like :scan
#
# Strings should be structured as follows:
# FAMILY:QUALIFIER[:CONVERTER]
# Where:
# - FAMILY is the column family
# - QUALIFIER is the column qualifier. Non-printable characters should be left AS-IS and should NOT BE escaped.
# - CONVERTER is optional and is the name of a converter (like toLong) to apply
#
# @param [String] column
# @return [ColumnFormatSpec] family, qualifier, and converter as Java bytes
private def parse_column_format_spec(column)
split = org.apache.hadoop.hbase.CellUtil.parseColumn(column.to_java_bytes)
family = split[0]
qualifier = nil
converter = nil
if split.length > 1
parts = org.apache.hadoop.hbase.CellUtil.parseColumn(split[1])
qualifier = parts[0]
if parts.length > 1
converter = parts[1]
end
end

ColumnFormatSpec.new(family, qualifier, converter)
end

private def set_column_converter(family, qualifier, converter)
@converters["#{String.from_java_bytes(family)}:#{String.from_java_bytes(qualifier)}"] = String.from_java_bytes(converter)
end

# if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair.
# 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only
# 2. register the CONVERTER information based on column spec - "cf:qualifier"
#
# Deprecated for removal in 4.0.0
def set_converter(column)
family = String.from_java_bytes(column[0])
parts = org.apache.hadoop.hbase.CellUtil.parseColumn(column[1])
Expand All @@ -817,6 +868,8 @@ def set_converter(column)
column[1] = parts[0]
end
end
extend Gem::Deprecate
deprecate :set_converter, "4.0.0", nil, nil

#----------------------------------------------------------------------------------------------
# Get the split points for the table
Expand Down
28 changes: 22 additions & 6 deletions hbase-shell/src/test/ruby/hbase/table_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def setup
@test_ts = 12345678
@test_table.put(1, "x:a", 1)
@test_table.put(1, "x:b", 2, @test_ts)
@test_table.put(1, "x:\x11", [921].pack("N"))

@test_table.put(2, "x:a", 11)
@test_table.put(2, "x:b", 12, @test_ts)
Expand Down Expand Up @@ -333,9 +334,10 @@ def teardown
end

define_test "get should work with hash columns spec and an array of strings COLUMN parameter" do
res = @test_table._get_internal('1', COLUMN => [ 'x:a', 'x:b' ])
res = @test_table._get_internal('1', COLUMN => [ "x:\x11", 'x:a', 'x:b' ])
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(res['x:\x11'])
assert_not_nil(res['x:a'])
assert_not_nil(res['x:b'])
end
Expand All @@ -356,6 +358,18 @@ def teardown
assert_not_nil(res['x:b'])
end

define_test "get should work with non-printable columns and values" do
res = @test_table._get_internal('1', COLUMNS => [ "x:\x11" ])
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_match(/value=\\x00\\x00\\x03\\x99/, res[ 'x:\x11' ])

res = @test_table._get_internal('1', COLUMNS => [ "x:\x11:toInt" ])
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_match(/value=921/, res[ 'x:\x11' ])
end

define_test "get should work with hash columns spec and TIMESTAMP only" do
res = @test_table._get_internal('1', TIMESTAMP => @test_ts)
assert_not_nil(res)
Expand Down Expand Up @@ -412,10 +426,10 @@ def teardown
assert_not_nil(res['x:b'])
end

define_test "get with a block should yield (column, value) pairs" do
define_test "get with a block should yield (formatted column, value) pairs" do
res = {}
@test_table._get_internal('1') { |col, val| res[col] = val }
assert_equal(res.keys.sort, [ 'x:a', 'x:b' ])
assert_equal([ 'x:\x11', 'x:a', 'x:b' ], res.keys.sort)
end

define_test "get should support COLUMNS with value CONVERTER information" do
Expand Down Expand Up @@ -709,12 +723,14 @@ def teardown
define_test "scan should support COLUMNS with value CONVERTER information" do
@test_table.put(1, "x:c", [1024].pack('N'))
@test_table.put(1, "x:d", [98].pack('N'))
@test_table.put(1, "x:\x11", [712].pack('N'))
begin
res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt']
res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt', "x:\x11:toInt"]
assert_not_nil(res)
assert_kind_of(Hash, res)
assert_not_nil(/value=1024/.match(res['1']['x:c']))
assert_not_nil(/value=98/.match(res['1']['x:d']))
assert_match(/value=1024/, res['1']['x:c'])
assert_match(/value=98/, res['1']['x:d'])
assert_match(/value=712/, res['1']['x:\x11'])
ensure
# clean up newly added columns for this test only.
@test_table.deleteall(1, 'x:c')
Expand Down