@@ -449,18 +449,23 @@ def _get_internal(row, *args)
449
449
# Print out results. Result can be Cell or RowResult.
450
450
res = { }
451
451
result . listCells . each do |c |
452
- family = convert_bytes_with_position ( c . getFamilyArray ,
453
- c . getFamilyOffset , c . getFamilyLength , converter_class , converter )
454
- qualifier = convert_bytes_with_position ( c . getQualifierArray ,
455
- c . getQualifierOffset , c . getQualifierLength , converter_class , converter )
452
+ # Get the family and qualifier of the cell without escaping non-printable characters. It is crucial that
453
+ # column is constructed in this consistent way to that it can be used as a key.
454
+ family_bytes = org . apache . hadoop . hbase . util . Bytes . copy ( c . getFamilyArray , c . getFamilyOffset , c . getFamilyLength )
455
+ qualifier_bytes = org . apache . hadoop . hbase . util . Bytes . copy ( c . getQualifierArray , c . getQualifierOffset , c . getQualifierLength )
456
+ column = "#{ family_bytes } :#{ qualifier_bytes } "
456
457
457
- column = "#{ family } :#{ qualifier } "
458
458
value = to_string ( column , c , maxlength , converter_class , converter )
459
459
460
+ # Use the FORMATTER to determine how column is printed
461
+ family = convert_bytes ( family_bytes , converter_class , converter )
462
+ qualifier = convert_bytes ( qualifier_bytes , converter_class , converter )
463
+ formatted_column = "#{ family } :#{ qualifier } "
464
+
460
465
if block_given?
461
- yield ( column , value )
466
+ yield ( formatted_column , value )
462
467
else
463
- res [ column ] = value
468
+ res [ formatted_column ] = value
464
469
end
465
470
end
466
471
@@ -604,19 +609,24 @@ def _scan_internal(args = {}, scan = nil)
604
609
is_stale |= row . isStale
605
610
606
611
row . listCells . each do |c |
607
- family = convert_bytes_with_position ( c . getFamilyArray ,
608
- c . getFamilyOffset , c . getFamilyLength , converter_class , converter )
609
- qualifier = convert_bytes_with_position ( c . getQualifierArray ,
610
- c . getQualifierOffset , c . getQualifierLength , converter_class , converter )
612
+ # Get the family and qualifier of the cell without escaping non-printable characters. It is crucial that
613
+ # column is constructed in this consistent way to that it can be used as a key.
614
+ family_bytes = org . apache . hadoop . hbase . util . Bytes . copy ( c . getFamilyArray , c . getFamilyOffset , c . getFamilyLength )
615
+ qualifier_bytes = org . apache . hadoop . hbase . util . Bytes . copy ( c . getQualifierArray , c . getQualifierOffset , c . getQualifierLength )
616
+ column = "#{ family_bytes } :#{ qualifier_bytes } "
611
617
612
- column = "#{ family } :#{ qualifier } "
613
618
cell = to_string ( column , c , maxlength , converter_class , converter )
614
619
620
+ # Use the FORMATTER to determine how column is printed
621
+ family = convert_bytes ( family_bytes , converter_class , converter )
622
+ qualifier = convert_bytes ( qualifier_bytes , converter_class , converter )
623
+ formatted_column = "#{ family } :#{ qualifier } "
624
+
615
625
if block_given?
616
- yield ( key , "column=#{ column } , #{ cell } " )
626
+ yield ( key , "column=#{ formatted_column } , #{ cell } " )
617
627
else
618
628
res [ key ] ||= { }
619
- res [ key ] [ column ] = cell
629
+ res [ key ] [ formatted_column ] = cell
620
630
end
621
631
end
622
632
# One more row processed
@@ -729,11 +739,15 @@ def is_meta_table?
729
739
org . apache . hadoop . hbase . TableName ::META_TABLE_NAME . equals ( @table . getName )
730
740
end
731
741
732
- # Returns family and (when has it) qualifier for a column name
742
+ # Given a column specification in the format FAMILY[:QUALIFIER[:CONVERTER]]
743
+ # 1. Save the converter for the given column
744
+ # 2. Return a 2-element Array with [family, qualifier or nil], discarding the converter if provided
745
+ #
746
+ # @param [String] column specification
733
747
def parse_column_name ( column )
734
- split = org . apache . hadoop . hbase . CellUtil . parseColumn ( column . to_java_bytes )
735
- set_converter ( split ) if split . length > 1
736
- [ split [ 0 ] , split . length > 1 ? split [ 1 ] : nil ]
748
+ spec = parse_column_format_spec ( column )
749
+ set_column_converter ( spec . family , spec . qualifier , spec . converter ) unless spec . converter . nil?
750
+ [ spec . family , spec . qualifier ]
737
751
end
738
752
739
753
def toISO8601 ( millis )
@@ -806,9 +820,46 @@ def convert_bytes_with_position(bytes, offset, len, converter_class, converter_m
806
820
eval ( converter_class ) . method ( converter_method ) . call ( bytes , offset , len )
807
821
end
808
822
823
+ # store the information designating what part of a column should be printed, and how
824
+ ColumnFormatSpec = Struct . new ( :family , :qualifier , :converter )
825
+
826
+ ##
827
+ # Parse the column specification for formatting used by shell commands like :scan
828
+ #
829
+ # Strings should be structured as follows:
830
+ # FAMILY:QUALIFIER[:CONVERTER]
831
+ # Where:
832
+ # - FAMILY is the column family
833
+ # - QUALIFIER is the column qualifier. Non-printable characters should be left AS-IS and should NOT BE escaped.
834
+ # - CONVERTER is optional and is the name of a converter (like toLong) to apply
835
+ #
836
+ # @param [String] column
837
+ # @return [ColumnFormatSpec] family, qualifier, and converter as Java bytes
838
+ private def parse_column_format_spec ( column )
839
+ split = org . apache . hadoop . hbase . CellUtil . parseColumn ( column . to_java_bytes )
840
+ family = split [ 0 ]
841
+ qualifier = nil
842
+ converter = nil
843
+ if split . length > 1
844
+ parts = org . apache . hadoop . hbase . CellUtil . parseColumn ( split [ 1 ] )
845
+ qualifier = parts [ 0 ]
846
+ if parts . length > 1
847
+ converter = parts [ 1 ]
848
+ end
849
+ end
850
+
851
+ ColumnFormatSpec . new ( family , qualifier , converter )
852
+ end
853
+
854
+ private def set_column_converter ( family , qualifier , converter )
855
+ @converters [ "#{ String . from_java_bytes ( family ) } :#{ String . from_java_bytes ( qualifier ) } " ] = String . from_java_bytes ( converter )
856
+ end
857
+
809
858
# if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair.
810
859
# 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only
811
860
# 2. register the CONVERTER information based on column spec - "cf:qualifier"
861
+ #
862
+ # Deprecated for removal in 4.0.0
812
863
def set_converter ( column )
813
864
family = String . from_java_bytes ( column [ 0 ] )
814
865
parts = org . apache . hadoop . hbase . CellUtil . parseColumn ( column [ 1 ] )
@@ -817,6 +868,8 @@ def set_converter(column)
817
868
column [ 1 ] = parts [ 0 ]
818
869
end
819
870
end
871
+ extend Gem ::Deprecate
872
+ deprecate :set_converter , "4.0.0" , nil , nil
820
873
821
874
#----------------------------------------------------------------------------------------------
822
875
# Get the split points for the table
0 commit comments