forked from chromium/chromium
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move MatchPattern to its own header and the base namespace.
BUG= Review URL: https://codereview.chromium.org/1226673003 Cr-Commit-Position: refs/heads/master@{#337488}
- Loading branch information
brettw
authored and
Commit bot
committed
Jul 6, 2015
1 parent
7d47f7a
commit d97eede
Showing
48 changed files
with
482 additions
and
396 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
// Copyright 2015 The Chromium Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. | ||
|
||
#include "base/strings/pattern.h" | ||
|
||
#include "base/third_party/icu/icu_utf.h" | ||
|
||
namespace base { | ||
|
||
namespace { | ||
|
||
static bool IsWildcard(base_icu::UChar32 character) { | ||
return character == '*' || character == '?'; | ||
} | ||
|
||
// Move the strings pointers to the point where they start to differ. | ||
template <typename CHAR, typename NEXT> | ||
static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, | ||
const CHAR** string, const CHAR* string_end, | ||
NEXT next) { | ||
const CHAR* escape = NULL; | ||
while (*pattern != pattern_end && *string != string_end) { | ||
if (!escape && IsWildcard(**pattern)) { | ||
// We don't want to match wildcard here, except if it's escaped. | ||
return; | ||
} | ||
|
||
// Check if the escapement char is found. If so, skip it and move to the | ||
// next character. | ||
if (!escape && **pattern == '\\') { | ||
escape = *pattern; | ||
next(pattern, pattern_end); | ||
continue; | ||
} | ||
|
||
// Check if the chars match, if so, increment the ptrs. | ||
const CHAR* pattern_next = *pattern; | ||
const CHAR* string_next = *string; | ||
base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); | ||
if (pattern_char == next(&string_next, string_end) && | ||
pattern_char != CBU_SENTINEL) { | ||
*pattern = pattern_next; | ||
*string = string_next; | ||
} else { | ||
// Uh oh, it did not match, we are done. If the last char was an | ||
// escapement, that means that it was an error to advance the ptr here, | ||
// let's put it back where it was. This also mean that the MatchPattern | ||
// function will return false because if we can't match an escape char | ||
// here, then no one will. | ||
if (escape) { | ||
*pattern = escape; | ||
} | ||
return; | ||
} | ||
|
||
escape = NULL; | ||
} | ||
} | ||
|
||
template <typename CHAR, typename NEXT> | ||
static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { | ||
while (*pattern != end) { | ||
if (!IsWildcard(**pattern)) | ||
return; | ||
next(pattern, end); | ||
} | ||
} | ||
|
||
template <typename CHAR, typename NEXT> | ||
static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, | ||
const CHAR* pattern, const CHAR* pattern_end, | ||
int depth, | ||
NEXT next) { | ||
const int kMaxDepth = 16; | ||
if (depth > kMaxDepth) | ||
return false; | ||
|
||
// Eat all the matching chars. | ||
EatSameChars(&pattern, pattern_end, &eval, eval_end, next); | ||
|
||
// If the string is empty, then the pattern must be empty too, or contains | ||
// only wildcards. | ||
if (eval == eval_end) { | ||
EatWildcard(&pattern, pattern_end, next); | ||
return pattern == pattern_end; | ||
} | ||
|
||
// Pattern is empty but not string, this is not a match. | ||
if (pattern == pattern_end) | ||
return false; | ||
|
||
// If this is a question mark, then we need to compare the rest with | ||
// the current string or the string with one character eaten. | ||
const CHAR* next_pattern = pattern; | ||
next(&next_pattern, pattern_end); | ||
if (pattern[0] == '?') { | ||
if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, | ||
depth + 1, next)) | ||
return true; | ||
const CHAR* next_eval = eval; | ||
next(&next_eval, eval_end); | ||
if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, | ||
depth + 1, next)) | ||
return true; | ||
} | ||
|
||
// This is a *, try to match all the possible substrings with the remainder | ||
// of the pattern. | ||
if (pattern[0] == '*') { | ||
// Collapse duplicate wild cards (********** into *) so that the | ||
// method does not recurse unnecessarily. http://crbug.com/52839 | ||
EatWildcard(&next_pattern, pattern_end, next); | ||
|
||
while (eval != eval_end) { | ||
if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, | ||
depth + 1, next)) | ||
return true; | ||
eval++; | ||
} | ||
|
||
// We reached the end of the string, let see if the pattern contains only | ||
// wildcards. | ||
if (eval == eval_end) { | ||
EatWildcard(&pattern, pattern_end, next); | ||
if (pattern != pattern_end) | ||
return false; | ||
return true; | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
struct NextCharUTF8 { | ||
base_icu::UChar32 operator()(const char** p, const char* end) { | ||
base_icu::UChar32 c; | ||
int offset = 0; | ||
CBU8_NEXT(*p, offset, end - *p, c); | ||
*p += offset; | ||
return c; | ||
} | ||
}; | ||
|
||
struct NextCharUTF16 { | ||
base_icu::UChar32 operator()(const char16** p, const char16* end) { | ||
base_icu::UChar32 c; | ||
int offset = 0; | ||
CBU16_NEXT(*p, offset, end - *p, c); | ||
*p += offset; | ||
return c; | ||
} | ||
}; | ||
|
||
} // namespace | ||
|
||
bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) { | ||
return MatchPatternT(eval.data(), eval.data() + eval.size(), | ||
pattern.data(), pattern.data() + pattern.size(), | ||
0, NextCharUTF8()); | ||
} | ||
|
||
bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) { | ||
return MatchPatternT(eval.data(), eval.data() + eval.size(), | ||
pattern.data(), pattern.data() + pattern.size(), | ||
0, NextCharUTF16()); | ||
} | ||
|
||
} // namespace base |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// Copyright 2015 The Chromium Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. | ||
|
||
#ifndef BASE_STRINGS_PATTERN_H_ | ||
#define BASE_STRINGS_PATTERN_H_ | ||
|
||
#include "base/base_export.h" | ||
#include "base/strings/string_piece.h" | ||
|
||
namespace base { | ||
|
||
// Returns true if the string passed in matches the pattern. The pattern | ||
// string can contain wildcards like * and ? | ||
// | ||
// The backslash character (\) is an escape character for * and ? | ||
// We limit the patterns to having a max of 16 * or ? characters. | ||
// ? matches 0 or 1 character, while * matches 0 or more characters. | ||
BASE_EXPORT bool MatchPattern(const StringPiece& string, | ||
const StringPiece& pattern); | ||
BASE_EXPORT bool MatchPattern(const StringPiece16& string, | ||
const StringPiece16& pattern); | ||
|
||
} // namespace base | ||
|
||
#endif // BASE_STRINGS_PATTERN_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// Copyright 2015 The Chromium Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style license that can be | ||
// found in the LICENSE file. | ||
|
||
#include "base/strings/pattern.h" | ||
#include "base/strings/utf_string_conversions.h" | ||
#include "testing/gtest/include/gtest/gtest.h" | ||
|
||
namespace base { | ||
|
||
TEST(StringUtilTest, MatchPatternTest) { | ||
EXPECT_TRUE(MatchPattern("www.google.com", "*.com")); | ||
EXPECT_TRUE(MatchPattern("www.google.com", "*")); | ||
EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org")); | ||
EXPECT_TRUE(MatchPattern("Hello", "H?l?o")); | ||
EXPECT_FALSE(MatchPattern("www.google.com", "http://*)")); | ||
EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM")); | ||
EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*")); | ||
EXPECT_FALSE(MatchPattern("", "*.*")); | ||
EXPECT_TRUE(MatchPattern("", "*")); | ||
EXPECT_TRUE(MatchPattern("", "?")); | ||
EXPECT_TRUE(MatchPattern("", "")); | ||
EXPECT_FALSE(MatchPattern("Hello", "")); | ||
EXPECT_TRUE(MatchPattern("Hello*", "Hello*")); | ||
// Stop after a certain recursion depth. | ||
EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*")); | ||
|
||
// Test UTF8 matching. | ||
EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0")); | ||
EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?.")); | ||
EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*")); | ||
// Invalid sequences should be handled as a single invalid character. | ||
EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?")); | ||
// If the pattern has invalid characters, it shouldn't match anything. | ||
EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80")); | ||
|
||
// Test UTF16 character matching. | ||
EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"), | ||
UTF8ToUTF16("*.com"))); | ||
EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"), | ||
UTF8ToUTF16("He??o\\*1*"))); | ||
|
||
// This test verifies that consecutive wild cards are collapsed into 1 | ||
// wildcard (when this doesn't occur, MatchPattern reaches it's maximum | ||
// recursion depth). | ||
EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"), | ||
UTF8ToUTF16("He********************************o"))); | ||
} | ||
|
||
} // namespace base |
Oops, something went wrong.