Skip to content

Commit

Permalink
Using juniversalchardet
Browse files Browse the repository at this point in the history
  • Loading branch information
M66B committed Oct 10, 2020
1 parent 5707f8b commit 615a006
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 9 deletions.
1 change: 1 addition & 0 deletions ATTRIBUTION.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ FairEmail uses:
* [GPX file type icon](https://www.flaticon.com/free-icon/gpx-file-format-variant_29258) made by [Freepik](https://www.flaticon.com/authors/freepik) from [Flaticon](https://www.flaticon.com).
* [Disconnect's tracker protection lists](https://github.com/disconnectme/disconnect-tracking-protection). Copyright 2010-2020 Disconnect, Inc. [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International license](https://github.com/disconnectme/disconnect-tracking-protection/blob/master/LICENSE).
* [Over-Scroll Support For Android's RecyclerView, ListView, GridView, ScrollView ...](https://github.com/EverythingMe/overscroll-decor). Copyright (c) 2015, DoAT Media Ltd. [BSD-2-Clause License](https://github.com/EverythingMe/overscroll-decor/blob/master/LICENSE)
* [juniversalchardet](https://github.com/albfernandez/juniversalchardet). Copyright (C) 2001 the Initial Developer. All Rights Reserved. [GNU General Public License Version 2](https://github.com/albfernandez/juniversalchardet#license).
5 changes: 5 additions & 0 deletions app/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ dependencies {
def overscroll_version = "1.1.0"
def appauth_version = "0.7.1"
def jcharset_version = "2.1"
def jchardet_version = "2.3.2"

// https://developer.android.com/jetpack/androidx/releases/

Expand Down Expand Up @@ -395,4 +396,8 @@ dependencies {
// http://www.freeutils.net/source/jcharset/
// https://mvnrepository.com/artifact/net.freeutils/jcharset
implementation "net.freeutils:jcharset:$jcharset_version"

// https://github.com/albfernandez/juniversalchardet
// https://mvnrepository.com/artifact/com.github.albfernandez/juniversalchardet
implementation "com.github.albfernandez:juniversalchardet:$jchardet_version"
}
1 change: 1 addition & 0 deletions app/src/main/assets/ATTRIBUTION.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ FairEmail uses:
* [GPX file type icon](https://www.flaticon.com/free-icon/gpx-file-format-variant_29258) made by [Freepik](https://www.flaticon.com/authors/freepik) from [Flaticon](https://www.flaticon.com).
* [Disconnect's tracker protection lists](https://github.com/disconnectme/disconnect-tracking-protection). Copyright 2010-2020 Disconnect, Inc. [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International license](https://github.com/disconnectme/disconnect-tracking-protection/blob/master/LICENSE).
* [Over-Scroll Support For Android's RecyclerView, ListView, GridView, ScrollView ...](https://github.com/EverythingMe/overscroll-decor). Copyright (c) 2015, DoAT Media Ltd. [BSD-2-Clause License](https://github.com/EverythingMe/overscroll-decor/blob/master/LICENSE)
* [juniversalchardet](https://github.com/albfernandez/juniversalchardet). Copyright (C) 2001 the Initial Developer. All Rights Reserved. [GNU General Public License Version 2](https://github.com/albfernandez/juniversalchardet#license).
26 changes: 26 additions & 0 deletions app/src/main/java/eu/faircode/email/CharsetHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,17 @@
Copyright 2018-2020 by Marcel Bokhorst (M66B)
*/

import org.mozilla.universalchardet.UniversalDetector;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;

class CharsetHelper {
private static UniversalDetector detector = new UniversalDetector();

private static final int SAMPLE_SIZE = 2 * 1024;

static boolean isUTF8(String text) {
// Get extended ASCII characters
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
Expand Down Expand Up @@ -115,4 +121,24 @@ else if (c == '$')

return false;
}

static Charset detect(String text) {
try {
byte[] sample = text.getBytes(StandardCharsets.ISO_8859_1);

detector.handleData(sample, 0, Math.min(SAMPLE_SIZE, sample.length));
detector.dataEnd();

String detected = detector.getDetectedCharset();
if (detected == null)
return null;

return Charset.forName(detected);
} catch (Throwable ex) {
Log.w(ex);
return null;
} finally {
detector.reset();
}
}
}
24 changes: 15 additions & 9 deletions app/src/main/java/eu/faircode/email/MessageHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -1729,22 +1729,28 @@ else if (content instanceof InputStream)
if (UnknownCharsetProvider.charsetForMime(charset) == null)
warnings.add(context.getString(R.string.title_no_charset, charset));

if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())))
charset = null;

if (part.isMimeType("text/plain")) {
if (TextUtils.isEmpty(charset) && CharsetHelper.isISO2022JP(result))
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), "ISO-2022-JP");
else if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())) &&
CharsetHelper.isUTF8(result)) {
Log.i("Charset plain=UTF8");
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
if (charset == null) {
Charset detected = CharsetHelper.detect(result);
if (detected == null) {
if (CharsetHelper.isUTF8(result)) {
Log.i("Charset plain=UTF8");
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}
} else {
Log.i("Charset plain=" + detected.name());
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), detected);
}
}

if ("flowed".equalsIgnoreCase(ct.getParameter("format")))
result = HtmlHelper.flow(result);
result = "<div x-plain=\"true\">" + HtmlHelper.formatPre(result) + "</div>";
} else if (part.isMimeType("text/html")) {
if (TextUtils.isEmpty(charset) && CharsetHelper.isISO2022JP(result))
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), "ISO-2022-JP");
else if (TextUtils.isEmpty(charset)) {
if (charset == null) {
// <meta charset="utf-8" />
// <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length()));
Expand Down

0 comments on commit 615a006

Please sign in to comment.