-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(decoders): basic escaped unicode
- Loading branch information
Showing
5 changed files
with
1,166 additions
and
1,014 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ func DefaultDecoders() []Decoder { | |
&UTF8{}, | ||
&Base64{}, | ||
&UTF16{}, | ||
&EscapedUnicode{}, | ||
} | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package decoders | ||
|
||
import ( | ||
"regexp" | ||
"strconv" | ||
"unicode/utf8" | ||
|
||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" | ||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources" | ||
) | ||
|
||
type EscapedUnicode struct{} | ||
|
||
var _ Decoder = (*EscapedUnicode)(nil) | ||
|
||
// It might be advantageous to limit these to a subset of acceptable characters, similar to base64. | ||
// https://dencode.com/en/string/unicode-escape | ||
var escapePat = regexp.MustCompile(`(?i:\\{1,2}u)([a-fA-F0-9]{4})`) | ||
|
||
func (d *EscapedUnicode) FromChunk(chunk *sources.Chunk) *DecodableChunk { | ||
if chunk == nil || len(chunk.Data) == 0 || !escapePat.Match(chunk.Data) { | ||
return nil | ||
} | ||
|
||
decoded := decodeUnicode(chunk.Data) | ||
if decoded == nil { | ||
return nil | ||
} | ||
|
||
chunk.Data = decoded | ||
decodableChunk := &DecodableChunk{ | ||
DecoderType: detectorspb.DecoderType_ESCAPED_UNICODE, | ||
Chunk: chunk, | ||
} | ||
|
||
return decodableChunk | ||
} | ||
|
||
func decodeUnicode(input []byte) []byte { | ||
// Find all Unicode escape sequences in the input byte slice | ||
indices := escapePat.FindAllSubmatchIndex(input, -1) | ||
if len(indices) == 0 { | ||
return nil | ||
} | ||
|
||
// Iterate over found indices in reverse order to avoid modifying the slice length | ||
//for i, matches := range indices { | ||
for i := len(indices) - 1; i >= 0; i-- { | ||
matches := indices[i] | ||
startIndex := matches[0] | ||
hexStartIndex := matches[2] | ||
endIndex := matches[3] | ||
|
||
// Extract the hexadecimal value from the escape sequence | ||
hexValue := string(input[hexStartIndex:endIndex]) | ||
|
||
// Parse the hexadecimal value to an integer | ||
unicodeInt, err := strconv.ParseInt(hexValue, 16, 32) | ||
if err != nil { | ||
// If there's an error, continue to the next escape sequence | ||
continue | ||
} | ||
|
||
// Convert the Unicode code point to a UTF-8 representation | ||
utf8Bytes := make([]byte, 4) | ||
utf8Len := utf8.EncodeRune(utf8Bytes, rune(unicodeInt)) | ||
|
||
// Replace the escape sequence with the UTF-8 representation | ||
input = append(input[:startIndex], append(utf8Bytes[:utf8Len], input[endIndex:]...)...) | ||
} | ||
|
||
return input | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package decoders | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/kylelemons/godebug/pretty" | ||
|
||
"github.com/trufflesecurity/trufflehog/v3/pkg/sources" | ||
) | ||
|
||
func TestUnicodeEscape_FromChunk(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
chunk *sources.Chunk | ||
want *sources.Chunk | ||
wantErr bool | ||
}{ | ||
{ | ||
name: "all escaped", | ||
chunk: &sources.Chunk{ | ||
Data: []byte("\\u0074\\u006f\\u006b\\u0065\\u006e\\u003a\\u0020\\u0022\\u0067\\u0068\\u0070\\u005f\\u0049\\u0077\\u0064\\u004d\\u0078\\u0039\\u0057\\u0046\\u0057\\u0052\\u0052\\u0066\\u004d\\u0068\\u0054\\u0059\\u0069\\u0061\\u0056\\u006a\\u005a\\u0037\\u0038\\u004a\\u0066\\u0075\\u0061\\u006d\\u0076\\u006e\\u0030\\u0059\\u0057\\u0052\\u004d\\u0030\\u0022"), | ||
}, | ||
want: &sources.Chunk{ | ||
Data: []byte("token: \"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\""), | ||
}, | ||
}, | ||
{ | ||
name: "mixed content", | ||
chunk: &sources.Chunk{ | ||
Data: []byte("npm config set @trufflesec:registry=https://npm.pkg.github.com\nnpm config set //npm.pkg.github.com:_authToken=$'\\u0067hp_9ovSHEBCq0drG42yjoam76iNybtqLN25CgSf'"), | ||
}, | ||
want: &sources.Chunk{ | ||
Data: []byte("npm config set @trufflesec:registry=https://npm.pkg.github.com\nnpm config set //npm.pkg.github.com:_authToken=$'ghp_9ovSHEBCq0drG42yjoam76iNybtqLN25CgSf'"), | ||
}, | ||
}, | ||
{ | ||
name: "multiple slashes", | ||
chunk: &sources.Chunk{ | ||
Data: []byte(`SameValue("hello","\\u0068el\\u006co"); // true`), | ||
}, | ||
want: &sources.Chunk{ | ||
Data: []byte(`SameValue("hello","hello"); // true`), | ||
}, | ||
}, | ||
{ | ||
name: "no escaped", | ||
chunk: &sources.Chunk{ | ||
Data: []byte(`-//npm.fontawesome.com/:_authToken=12345678-2323-1111-1111-12345670B312 | ||
+//npm.fontawesome.com/:_authToken=REMOVED_TOKEN`), | ||
}, | ||
want: nil, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
d := &EscapedUnicode{} | ||
got := d.FromChunk(tt.chunk) | ||
if tt.want != nil { | ||
if got == nil { | ||
t.Fatal("got nil, did not want nil") | ||
} | ||
if diff := pretty.Compare(string(tt.want.Data), string(got.Data)); diff != "" { | ||
t.Errorf("UnicodeEscape.FromChunk() %s diff: (-want +got)\n%s", tt.name, diff) | ||
} | ||
} else { | ||
if got != nil { | ||
t.Error("Expected nil chunk") | ||
} | ||
} | ||
}) | ||
} | ||
} |
Oops, something went wrong.