Skip to content

Commit

Permalink
Initial repo setup for third party library CED
Browse files Browse the repository at this point in the history
Initializes src/third_party/ced for CED, open-source library used to
detect texting encoding automatically.

This library is intended to replace ICU used in Blink for
automatic text encoding detection feature. The feature,
when enabled, helps web pages coming without encoding label
displayed as expected instead of showing gibberish.

The feature is disabled by default despite its usefulness,
since it slows down the page loading for those unlabeled ones
(about 2% of total pages) down to 20~30% in some cases. It is
left to users to turn it on via UI.

CED is used extensively in Google, and is a better
alternative in terms of accuracy, speed, size, etc.
Experiments show that CED introduces no significant load.
With CED in place, it is possible that automatic encoding
detection can be turned on by default without introducing
regression in page loading time. This will be particularly
beneficial on mobile platform like Android where UI has more
limitations. It will just work without individual users
having to turn it on.

BUG=597488

Review-Url: https://codereview.chromium.org/2047993002
Cr-Commit-Position: refs/heads/master@{#400906}
  • Loading branch information
JinsukKim authored and Commit bot committed Jun 21, 2016
1 parent e05337a commit 21d3369
Show file tree
Hide file tree
Showing 7 changed files with 199 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ vs-chromium-project.txt
/third_party/cacheinvalidation/cacheinvalidation_unittests_run.xml
/third_party/cardboard-java/src
/third_party/catapult
/third_party/ced/src
/third_party/chromeos_login_manager
/third_party/chromeos_text_input
/third_party/chromite
Expand Down
3 changes: 3 additions & 0 deletions DEPS
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,9 @@ deps = {
'src/third_party/mesa/src':
Var('chromium_git') + '/chromium/deps/mesa.git' + '@' + 'ef811c6bd4de74e13e7035ca882cc77f85793fef',

'src/third_party/ced/src':
Var('chromium_git') + '/external/github.com/google/compact_enc_det.git' + '@' + '511859ce93b8b6a1d002b96c79aeb6b184dab44a',

'src/third_party/cld_2/src':
Var('chromium_git') + '/external/github.com/CLD2Owners/cld2.git' + '@' + '84b58a5d7690ebf05a91406f371ce00c3daf31c0',

Expand Down
76 changes: 76 additions & 0 deletions third_party/ced/BUILD.gn
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import("//testing/test.gni")

config("ced_config") {
include_dirs = [ "src" ]
if (is_clang) {
cflags = [ "-Wno-unused-function" ]
}
}

source_set("ced") {
sources = [
"src/compact_enc_det/compact_enc_det.cc",
"src/compact_enc_det/compact_enc_det.h",
"src/compact_enc_det/compact_enc_det_generated_tables.h",
"src/compact_enc_det/compact_enc_det_generated_tables2.h",
"src/compact_enc_det/compact_enc_det_hint_code.cc",
"src/compact_enc_det/compact_enc_det_hint_code.h",
"src/util/basictypes.h",
"src/util/build_config.h",
"src/util/commandlineflags.h",
"src/util/encodings/encodings.cc",
"src/util/encodings/encodings.h",
"src/util/encodings/encodings.pb.h",
"src/util/languages/languages.cc",
"src/util/languages/languages.h",
"src/util/languages/languages.pb.h",
"src/util/logging.h",
"src/util/port.h",
"src/util/string_util.h",
"src/util/varsetter.h",
]

configs -= [ "//build/config/compiler:chromium_code" ]
configs += [ "//build/config/compiler:no_chromium_code" ]

public_configs = [ ":ced_config" ]

if (is_win) {
defines = [ "COMPILER_MSVC" ]
cflags = [
"/wd4005", # Macro defined twice.
"/wd4006", # #undef expected an identifier.
"/wd4309", # Truncation of constant value.
]
} else {
defines = [ "COMPILER_GCC" ]
}
}

test("ced_unittests") {
sources = [
"src/compact_enc_det/compact_enc_det_fuzz_test.cc",
"src/compact_enc_det/compact_enc_det_unittest.cc",
"src/compact_enc_det/detail_head_string.inc",
]

if (is_win) {
defines = [ "COMPILER_MSVC" ]
cflags = [
"/wd4310", # Truncation of constant value.
"/wd4267", # size_t -> int
]
} else {
defines = [ "COMPILER_GCC" ]
}

deps = [
":ced",
"//testing/gtest",
"//testing/gtest:gtest_main",
]
}
27 changes: 27 additions & 0 deletions third_party/ced/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2 changes: 2 additions & 0 deletions third_party/ced/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
jinsukkim@chromium.org
jshin@chromium.org
13 changes: 13 additions & 0 deletions third_party/ced/README.chromium
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Name: Compact Encoding Detection
Short Name: ced
URL: https://github.com/google/compact_enc_det
Version: 2f40a850bcc5d6f7c1bfa02dbf42ad19d8220dc0
License: Apache 2.0
License File: LICENSE
Security Critical: no

Description:
Compact Encoding Detection(CED for short) is a library written in C++ that
scans given raw bytes and detect the most likely text encoding.

Local Modifications: None
77 changes: 77 additions & 0 deletions third_party/ced/ced.gyp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright (c) 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

{
'includes': [
'../../build/win_precompile.gypi',
],
'targets': [
{
'target_name': 'ced',
'type': 'static_library',
'include_dirs': [
'src',
],
'sources': [
"src/compact_enc_det/compact_enc_det.cc",
"src/compact_enc_det/compact_enc_det.h",
"src/compact_enc_det/compact_enc_det_generated_tables.h",
"src/compact_enc_det/compact_enc_det_generated_tables2.h",
"src/compact_enc_det/compact_enc_det_hint_code.cc",
"src/compact_enc_det/compact_enc_det_hint_code.h",
"src/compact_enc_det/detail_head_string.inc",
"src/util/basictypes.h",
"src/util/build_config.h",
"src/util/commandlineflags.h",
"src/util/encodings/encodings.cc",
"src/util/encodings/encodings.h",
"src/util/encodings/encodings.pb.h",
"src/util/languages/languages.cc",
"src/util/languages/languages.h",
"src/util/languages/languages.pb.h",
"src/util/logging.h",
"src/util/port.h",
"src/util/string_util.h",
"src/util/varsetter.h",
],
'direct_dependent_settings': {
'include_dirs': [
'src',
],
},
'conditions': [
['OS=="win"', {
'direct_dependent_settings': {
'defines': [
'COMPILER_MSVC',
],
},
'msvs_disabled_warnings': [4005, 4006, 4244, 4309, 4800, 4267],
}, {
'direct_dependent_settings': {
'defines': [
'COMPILER_GCC',
],
},
}],
],
},
{
'target_name': 'ced_unittests',
'type': 'executable',
'dependencies': [
'<(DEPTH)/testing/gtest.gyp:gtest',
'<(DEPTH)/testing/gtest.gyp:gtest_main',
'ced',
],
'include_dirs': [
'<(DEPTH)',
],
'sources': [
"src/compact_enc_det/compact_enc_det_fuzz_test.cc",
"src/compact_enc_det/compact_enc_det_unittest.cc",
],
},
],
}

0 comments on commit 21d3369

Please sign in to comment.