Skip to content

Commit

Permalink
[rac] Integrate SplitString into libaddressinput.
Browse files Browse the repository at this point in the history
This patch removes unnecessary functions and dependencies from SplitString
and includes it in the build of libaddressinput.

BUG=327046

Review URL: https://codereview.chromium.org/113493002

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@240735 0039d316-1c4b-4281-b951-d872f2087c98
  • Loading branch information
rouslan@chromium.org committed Dec 13, 2013
1 parent 8487972 commit 32c90a9
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 537 deletions.
2 changes: 2 additions & 0 deletions third_party/libaddressinput/chromium/cpp/libaddressinput.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
'src/rule_retriever.cc',
'src/util/json.cc',
'src/util/md5.cc',
'src/util/string_split.cc',
'src/validating_storage.cc',
'src/validating_util.cc',
],
Expand Down Expand Up @@ -74,6 +75,7 @@
'test/util/json_test.cc',
'test/util/md5_unittest.cc',
'test/util/scoped_ptr_unittest.cc',
'test/util/string_split_unittest.cc',
'test/validating_storage_test.cc',
'test/validating_util_test.cc',
],
Expand Down
216 changes: 17 additions & 199 deletions third_party/libaddressinput/chromium/cpp/src/util/string_split.cc
Original file line number Diff line number Diff line change
@@ -1,219 +1,37 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// The original source code is from:
// http://src.chromium.org/viewvc/chrome/trunk/src/base/strings/string_split.cc?revision=216633

#include "base/strings/string_split.h"
#include "string_split.h"

#include "base/logging.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/third_party/icu/icu_utf.h"
#include <cassert>
#include <cstddef>
#include <string>
#include <vector>

namespace base {
namespace i18n {
namespace addressinput {

template<typename STR>
static void SplitStringT(const STR& str,
const typename STR::value_type s,
bool trim_whitespace,
std::vector<STR>* r) {
void SplitString(const std::string& str, char s, std::vector<std::string>* r) {
assert(r != NULL);
r->clear();
size_t last = 0;
size_t c = str.size();
for (size_t i = 0; i <= c; ++i) {
if (i == c || str[i] == s) {
STR tmp(str, last, i - last);
if (trim_whitespace)
TrimWhitespace(tmp, TRIM_ALL, &tmp);
std::string tmp(str, last, i - last);
// Avoid converting an empty or all-whitespace source string into a vector
// of one empty string.
if (i != c || !r->empty() || !tmp.empty())
if (i != c || !r->empty() || !tmp.empty()) {
r->push_back(tmp);
}
last = i + 1;
}
}
}

void SplitString(const string16& str,
char16 c,
std::vector<string16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, true, r);
}

void SplitString(const std::string& str,
char c,
std::vector<std::string>* r) {
#if CHAR_MIN < 0
DCHECK(c >= 0);
#endif
DCHECK(c < 0x7F);
SplitStringT(str, c, true, r);
}

bool SplitStringIntoKeyValues(
const std::string& line,
char key_value_delimiter,
std::string* key, std::vector<std::string>* values) {
key->clear();
values->clear();

// Find the key string.
size_t end_key_pos = line.find_first_of(key_value_delimiter);
if (end_key_pos == std::string::npos) {
DVLOG(1) << "cannot parse key from line: " << line;
return false; // no key
}
key->assign(line, 0, end_key_pos);

// Find the values string.
std::string remains(line, end_key_pos, line.size() - end_key_pos);
size_t begin_values_pos = remains.find_first_not_of(key_value_delimiter);
if (begin_values_pos == std::string::npos) {
DVLOG(1) << "cannot parse value from line: " << line;
return false; // no value
}
std::string values_string(remains, begin_values_pos,
remains.size() - begin_values_pos);

// Construct the values vector.
values->push_back(values_string);
return true;
}

bool SplitStringIntoKeyValuePairs(const std::string& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs) {
key_value_pairs->clear();

std::vector<std::string> pairs;
SplitString(line, key_value_pair_delimiter, &pairs);

bool success = true;
for (size_t i = 0; i < pairs.size(); ++i) {
// Empty pair. SplitStringIntoKeyValues is more strict about an empty pair
// line, so continue with the next pair.
if (pairs[i].empty())
continue;

std::string key;
std::vector<std::string> value;
if (!SplitStringIntoKeyValues(pairs[i],
key_value_delimiter,
&key, &value)) {
// Don't return here, to allow for keys without associated
// values; just record that our split failed.
success = false;
}
DCHECK_LE(value.size(), 1U);
key_value_pairs->push_back(
make_pair(key, value.empty() ? std::string() : value[0]));
}
return success;
}

template <typename STR>
static void SplitStringUsingSubstrT(const STR& str,
const STR& s,
std::vector<STR>* r) {
r->clear();
typename STR::size_type begin_index = 0;
while (true) {
const typename STR::size_type end_index = str.find(s, begin_index);
if (end_index == STR::npos) {
const STR term = str.substr(begin_index);
STR tmp;
TrimWhitespace(term, TRIM_ALL, &tmp);
r->push_back(tmp);
return;
}
const STR term = str.substr(begin_index, end_index - begin_index);
STR tmp;
TrimWhitespace(term, TRIM_ALL, &tmp);
r->push_back(tmp);
begin_index = end_index + s.size();
}
}

void SplitStringUsingSubstr(const string16& str,
const string16& s,
std::vector<string16>* r) {
SplitStringUsingSubstrT(str, s, r);
}

void SplitStringUsingSubstr(const std::string& str,
const std::string& s,
std::vector<std::string>* r) {
SplitStringUsingSubstrT(str, s, r);
}

void SplitStringDontTrim(const string16& str,
char16 c,
std::vector<string16>* r) {
DCHECK(CBU16_IS_SINGLE(c));
SplitStringT(str, c, false, r);
}

void SplitStringDontTrim(const std::string& str,
char c,
std::vector<std::string>* r) {
DCHECK(IsStringUTF8(str));
#if CHAR_MIN < 0
DCHECK(c >= 0);
#endif
DCHECK(c < 0x7F);
SplitStringT(str, c, false, r);
}

template<typename STR>
void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
result->clear();
const size_t length = str.length();
if (!length)
return;

bool last_was_ws = false;
size_t last_non_ws_start = 0;
for (size_t i = 0; i < length; ++i) {
switch (str[i]) {
// HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
case L' ':
case L'\t':
case L'\xA':
case L'\xB':
case L'\xC':
case L'\xD':
if (!last_was_ws) {
if (i > 0) {
result->push_back(
str.substr(last_non_ws_start, i - last_non_ws_start));
}
last_was_ws = true;
}
break;

default: // Not a space character.
if (last_was_ws) {
last_was_ws = false;
last_non_ws_start = i;
}
break;
}
}
if (!last_was_ws) {
result->push_back(
str.substr(last_non_ws_start, length - last_non_ws_start));
}
}

void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result) {
SplitStringAlongWhitespaceT(str, result);
}

void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result) {
SplitStringAlongWhitespaceT(str, result);
}

} // namespace base
} // namespace addressinput
} // namespace i18n
79 changes: 15 additions & 64 deletions third_party/libaddressinput/chromium/cpp/src/util/string_split.h
Original file line number Diff line number Diff line change
@@ -1,83 +1,34 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// The original source code is from:
// http://src.chromium.org/viewvc/chrome/trunk/src/base/strings/string_split.h?revision=236210
//
// Modifications from original:
// 1) Supports only std::string type.
// 2) Does not trim whitespace.

#ifndef BASE_STRINGS_STRING_SPLIT_H_
#define BASE_STRINGS_STRING_SPLIT_H_
#ifndef I18N_ADDRESSINPUT_UTIL_STRING_SPLIT_H_
#define I18N_ADDRESSINPUT_UTIL_STRING_SPLIT_H_

#include <string>
#include <utility>
#include <vector>

#include "base/base_export.h"
#include "base/strings/string16.h"

namespace base {
namespace i18n {
namespace addressinput {

// Splits |str| into a vector of strings delimited by |c|, placing the results
// in |r|. If several instances of |c| are contiguous, or if |str| begins with
// or ends with |c|, then an empty string is inserted.
//
// Every substring is trimmed of any leading or trailing white space.
// NOTE: |c| must be in BMP (Basic Multilingual Plane)
BASE_EXPORT void SplitString(const string16& str,
char16 c,
std::vector<string16>* r);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
// Note: |c| must be in the ASCII range.
BASE_EXPORT void SplitString(const std::string& str,
char c,
std::vector<std::string>* r);

BASE_EXPORT bool SplitStringIntoKeyValues(const std::string& line,
char key_value_delimiter,
std::string* key,
std::vector<std::string>* values);

typedef std::vector<std::pair<std::string, std::string> > StringPairs;;

BASE_EXPORT bool SplitStringIntoKeyValuePairs(
const std::string& line,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs);

// The same as SplitString, but use a substring delimiter instead of a char.
BASE_EXPORT void SplitStringUsingSubstr(const string16& str,
const string16& s,
std::vector<string16>* r);
BASE_EXPORT void SplitStringUsingSubstr(const std::string& str,
const std::string& s,
std::vector<std::string>* r);

// The same as SplitString, but don't trim white space.
// NOTE: |c| must be in BMP (Basic Multilingual Plane)
BASE_EXPORT void SplitStringDontTrim(const string16& str,
char16 c,
std::vector<string16>* r);
// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
// the trailing byte of a multi-byte character can be in the ASCII range.
// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
// Note: |c| must be in the ASCII range.
BASE_EXPORT void SplitStringDontTrim(const std::string& str,
char c,
std::vector<std::string>* r);

// WARNING: this uses whitespace as defined by the HTML5 spec. If you need
// a function similar to this but want to trim all types of whitespace, then
// factor this out into a function that takes a string containing the characters
// that are treated as whitespace.
//
// Splits the string along whitespace (where whitespace is the five space
// characters defined by HTML 5). Each contiguous block of non-whitespace
// characters is added to result.
BASE_EXPORT void SplitStringAlongWhitespace(const string16& str,
std::vector<string16>* result);
BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str,
std::vector<std::string>* result);
void SplitString(const std::string& str, char c, std::vector<std::string>* r);

} // namespace base
} // namespace addressinput
} // namespace i18n

#endif // BASE_STRINGS_STRING_SPLIT_H_
#endif // I18N_ADDRESSINPUT_UTIL_STRING_SPLIT_H_
Loading

0 comments on commit 32c90a9

Please sign in to comment.