forked from Pissandshittium/pissandshittium
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial repo setup for third party library CED
Initializes src/third_party/ced for CED, open-source library used to detect texting encoding automatically. This library is intended to replace ICU used in Blink for automatic text encoding detection feature. The feature, when enabled, helps web pages coming without encoding label displayed as expected instead of showing gibberish. The feature is disabled by default despite its usefulness, since it slows down the page loading for those unlabeled ones (about 2% of total pages) down to 20~30% in some cases. It is left to users to turn it on via UI. CED is used extensively in Google, and is a better alternative in terms of accuracy, speed, size, etc. Experiments show that CED introduces no significant load. With CED in place, it is possible that automatic encoding detection can be turned on by default without introducing regression in page loading time. This will be particularly beneficial on mobile platform like Android where UI has more limitations. It will just work without individual users having to turn it on. BUG=597488 Review-Url: https://codereview.chromium.org/2047993002 Cr-Commit-Position: refs/heads/master@{#400906}
- Loading branch information
Showing
7 changed files
with
199 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# Copyright 2016 The Chromium Authors. All rights reserved. | ||
# Use of this source code is governed by a BSD-style license that can be | ||
# found in the LICENSE file. | ||
|
||
import("//testing/test.gni") | ||
|
||
config("ced_config") { | ||
include_dirs = [ "src" ] | ||
if (is_clang) { | ||
cflags = [ "-Wno-unused-function" ] | ||
} | ||
} | ||
|
||
source_set("ced") { | ||
sources = [ | ||
"src/compact_enc_det/compact_enc_det.cc", | ||
"src/compact_enc_det/compact_enc_det.h", | ||
"src/compact_enc_det/compact_enc_det_generated_tables.h", | ||
"src/compact_enc_det/compact_enc_det_generated_tables2.h", | ||
"src/compact_enc_det/compact_enc_det_hint_code.cc", | ||
"src/compact_enc_det/compact_enc_det_hint_code.h", | ||
"src/util/basictypes.h", | ||
"src/util/build_config.h", | ||
"src/util/commandlineflags.h", | ||
"src/util/encodings/encodings.cc", | ||
"src/util/encodings/encodings.h", | ||
"src/util/encodings/encodings.pb.h", | ||
"src/util/languages/languages.cc", | ||
"src/util/languages/languages.h", | ||
"src/util/languages/languages.pb.h", | ||
"src/util/logging.h", | ||
"src/util/port.h", | ||
"src/util/string_util.h", | ||
"src/util/varsetter.h", | ||
] | ||
|
||
configs -= [ "//build/config/compiler:chromium_code" ] | ||
configs += [ "//build/config/compiler:no_chromium_code" ] | ||
|
||
public_configs = [ ":ced_config" ] | ||
|
||
if (is_win) { | ||
defines = [ "COMPILER_MSVC" ] | ||
cflags = [ | ||
"/wd4005", # Macro defined twice. | ||
"/wd4006", # #undef expected an identifier. | ||
"/wd4309", # Truncation of constant value. | ||
] | ||
} else { | ||
defines = [ "COMPILER_GCC" ] | ||
} | ||
} | ||
|
||
test("ced_unittests") { | ||
sources = [ | ||
"src/compact_enc_det/compact_enc_det_fuzz_test.cc", | ||
"src/compact_enc_det/compact_enc_det_unittest.cc", | ||
"src/compact_enc_det/detail_head_string.inc", | ||
] | ||
|
||
if (is_win) { | ||
defines = [ "COMPILER_MSVC" ] | ||
cflags = [ | ||
"/wd4310", # Truncation of constant value. | ||
"/wd4267", # size_t -> int | ||
] | ||
} else { | ||
defines = [ "COMPILER_GCC" ] | ||
} | ||
|
||
deps = [ | ||
":ced", | ||
"//testing/gtest", | ||
"//testing/gtest:gtest_main", | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// Copyright (c) 2010 The Chromium Authors. All rights reserved. | ||
// | ||
// Redistribution and use in source and binary forms, with or without | ||
// modification, are permitted provided that the following conditions are | ||
// met: | ||
// | ||
// * Redistributions of source code must retain the above copyright | ||
// notice, this list of conditions and the following disclaimer. | ||
// * Redistributions in binary form must reproduce the above | ||
// copyright notice, this list of conditions and the following disclaimer | ||
// in the documentation and/or other materials provided with the | ||
// distribution. | ||
// * Neither the name of Google Inc. nor the names of its | ||
// contributors may be used to endorse or promote products derived from | ||
// this software without specific prior written permission. | ||
// | ||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
jinsukkim@chromium.org | ||
jshin@chromium.org |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Name: Compact Encoding Detection | ||
Short Name: ced | ||
URL: https://github.com/google/compact_enc_det | ||
Version: 2f40a850bcc5d6f7c1bfa02dbf42ad19d8220dc0 | ||
License: Apache 2.0 | ||
License File: LICENSE | ||
Security Critical: no | ||
|
||
Description: | ||
Compact Encoding Detection(CED for short) is a library written in C++ that | ||
scans given raw bytes and detect the most likely text encoding. | ||
|
||
Local Modifications: None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
# Copyright (c) 2016 The Chromium Authors. All rights reserved. | ||
# Use of this source code is governed by a BSD-style license that can be | ||
# found in the LICENSE file. | ||
|
||
{ | ||
'includes': [ | ||
'../../build/win_precompile.gypi', | ||
], | ||
'targets': [ | ||
{ | ||
'target_name': 'ced', | ||
'type': 'static_library', | ||
'include_dirs': [ | ||
'src', | ||
], | ||
'sources': [ | ||
"src/compact_enc_det/compact_enc_det.cc", | ||
"src/compact_enc_det/compact_enc_det.h", | ||
"src/compact_enc_det/compact_enc_det_generated_tables.h", | ||
"src/compact_enc_det/compact_enc_det_generated_tables2.h", | ||
"src/compact_enc_det/compact_enc_det_hint_code.cc", | ||
"src/compact_enc_det/compact_enc_det_hint_code.h", | ||
"src/compact_enc_det/detail_head_string.inc", | ||
"src/util/basictypes.h", | ||
"src/util/build_config.h", | ||
"src/util/commandlineflags.h", | ||
"src/util/encodings/encodings.cc", | ||
"src/util/encodings/encodings.h", | ||
"src/util/encodings/encodings.pb.h", | ||
"src/util/languages/languages.cc", | ||
"src/util/languages/languages.h", | ||
"src/util/languages/languages.pb.h", | ||
"src/util/logging.h", | ||
"src/util/port.h", | ||
"src/util/string_util.h", | ||
"src/util/varsetter.h", | ||
], | ||
'direct_dependent_settings': { | ||
'include_dirs': [ | ||
'src', | ||
], | ||
}, | ||
'conditions': [ | ||
['OS=="win"', { | ||
'direct_dependent_settings': { | ||
'defines': [ | ||
'COMPILER_MSVC', | ||
], | ||
}, | ||
'msvs_disabled_warnings': [4005, 4006, 4244, 4309, 4800, 4267], | ||
}, { | ||
'direct_dependent_settings': { | ||
'defines': [ | ||
'COMPILER_GCC', | ||
], | ||
}, | ||
}], | ||
], | ||
}, | ||
{ | ||
'target_name': 'ced_unittests', | ||
'type': 'executable', | ||
'dependencies': [ | ||
'<(DEPTH)/testing/gtest.gyp:gtest', | ||
'<(DEPTH)/testing/gtest.gyp:gtest_main', | ||
'ced', | ||
], | ||
'include_dirs': [ | ||
'<(DEPTH)', | ||
], | ||
'sources': [ | ||
"src/compact_enc_det/compact_enc_det_fuzz_test.cc", | ||
"src/compact_enc_det/compact_enc_det_unittest.cc", | ||
], | ||
}, | ||
], | ||
} |