Skip to content

Commit 8acf7ef

Browse files
committed
update chardet
1 parent faeab6d commit 8acf7ef

File tree

3 files changed

+104
-0
lines changed

3 files changed

+104
-0
lines changed

chardet/chardet.charamel.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//go:build charamel
2+
3+
package chardet
4+
5+
import (
6+
"errors"
7+
8+
"github.com/gonejack/charamel"
9+
)
10+
11+
func init() {
12+
prefer(DetectEncodingByCharamel)
13+
}
14+
15+
var encodings = []charamel.Encoding{
16+
// UTF编码
17+
charamel.UTF8,
18+
charamel.UTF16,
19+
charamel.UTF16BE,
20+
charamel.UTF16LE,
21+
charamel.UTF32,
22+
charamel.UTF32BE,
23+
charamel.UTF32LE,
24+
25+
// ASCII
26+
charamel.ASCII,
27+
28+
// 西欧编码 (ISO-8859)
29+
charamel.LATIN1, // ISO-8859-1
30+
charamel.ISO88592, // ISO-8859-2
31+
charamel.ISO88593, // ISO-8859-3
32+
charamel.ISO88594, // ISO-8859-4
33+
charamel.ISO88595, // ISO-8859-5
34+
charamel.ISO88596, // ISO-8859-6
35+
charamel.ISO88597, // ISO-8859-7
36+
charamel.ISO88598, // ISO-8859-8
37+
charamel.ISO88599, // ISO-8859-9
38+
charamel.ISO885910, // ISO-8859-10
39+
charamel.ISO885911, // ISO-8859-11
40+
charamel.ISO885913, // ISO-8859-13
41+
charamel.ISO885914, // ISO-8859-14
42+
charamel.ISO885915, // ISO-8859-15
43+
charamel.ISO885916, // ISO-8859-16
44+
45+
// Windows编码 (CP1250-1258)
46+
charamel.CP1250,
47+
charamel.CP1251,
48+
charamel.CP1252,
49+
charamel.CP1253,
50+
charamel.CP1254,
51+
charamel.CP1255,
52+
charamel.CP1256,
53+
charamel.CP1257,
54+
charamel.CP1258,
55+
56+
// 中文编码
57+
charamel.GB2312,
58+
charamel.GBK,
59+
charamel.GB18030,
60+
charamel.BIG5,
61+
charamel.BIG5HKSCS,
62+
charamel.HZ, // hz-gb-2312
63+
64+
// 日文编码
65+
charamel.EUCJP, // euc-jp
66+
charamel.SHIFTJIS, // shift_jis
67+
charamel.ISO2022JP, // iso-2022-jp
68+
69+
// 韩文编码
70+
charamel.EUCKR, // euc-kr
71+
charamel.CP949, // windows-949
72+
charamel.ISO2022KR, // iso-2022-kr
73+
74+
// 俄文编码
75+
charamel.KOI8R, // koi8-r
76+
charamel.KOI8U, // koi8-u
77+
charamel.CP866, // cp866
78+
79+
// 泰文编码
80+
charamel.TIS620, // tis-620
81+
charamel.CP874, // windows-874
82+
83+
// Mac编码
84+
charamel.MACROMAN, // macintosh
85+
charamel.MACCYRILLIC, // x-mac-cyrillic
86+
}
87+
88+
func DetectEncodingByCharamel(dat []byte) (string, error) {
89+
d, err := charamel.NewDetector(encodings, 0.9)
90+
if err != nil {
91+
return "", err
92+
}
93+
v := d.Detect(dat)
94+
if v == nil {
95+
return "", errors.New("detect failed by github.com/gonejack/charamel")
96+
}
97+
return v.String(), nil
98+
}

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ require (
88
github.com/alecthomas/kong v0.8.1
99
github.com/endeveit/enca v0.0.0-20160315071803-00fe968221ab
1010
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f
11+
github.com/gonejack/charamel v1.0.1
1112
github.com/wlynxg/chardet v1.0.0
1213
golang.org/x/text v0.14.0
1314
)
1415

1516
require (
17+
github.com/x448/float16 v0.8.4 // indirect
1618
go.uber.org/multierr v1.11.0 // indirect
1719
go.uber.org/zap v1.27.0 // indirect
1820
)

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ github.com/endeveit/enca v0.0.0-20160315071803-00fe968221ab h1:8sh8Pynho3gYrdzdb
1010
github.com/endeveit/enca v0.0.0-20160315071803-00fe968221ab/go.mod h1:p9sYlSrwy19GJyed1EXDwdZeL4rVBd1tPoPgDvs7U1Q=
1111
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f h1:3BSP1Tbs2djlpprl7wCLuiqMaUh5SJkkzI2gDs+FgLs=
1212
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
13+
github.com/gonejack/charamel v1.0.1 h1:TKBCkAl0PoI+0s6jJJMRoLl2rLhuHL+BPcXvCCsa6qo=
14+
github.com/gonejack/charamel v1.0.1/go.mod h1:RQJBTqDLll8x8xAJvJAFhQSoOphw8NKZ+paIcDS0aLk=
1315
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
1416
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
1517
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -18,6 +20,8 @@ github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKs
1820
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
1921
github.com/wlynxg/chardet v1.0.0 h1:2gEgdmy/at4xIC+mOfNf1OFsb4LtnG9IcumfRXii/d0=
2022
github.com/wlynxg/chardet v1.0.0/go.mod h1:DgEUcneT6QieJ9qEhtRFOHWOjSNLPAo8lwUhjNopcFE=
23+
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
24+
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
2125
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
2226
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
2327
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=

0 commit comments

Comments
 (0)