|
| 1 | +//go:build charamel |
| 2 | + |
| 3 | +package chardet |
| 4 | + |
| 5 | +import ( |
| 6 | + "errors" |
| 7 | + |
| 8 | + "github.com/gonejack/charamel" |
| 9 | +) |
| 10 | + |
| 11 | +func init() { |
| 12 | + prefer(DetectEncodingByCharamel) |
| 13 | +} |
| 14 | + |
| 15 | +var encodings = []charamel.Encoding{ |
| 16 | + // UTF编码 |
| 17 | + charamel.UTF8, |
| 18 | + charamel.UTF16, |
| 19 | + charamel.UTF16BE, |
| 20 | + charamel.UTF16LE, |
| 21 | + charamel.UTF32, |
| 22 | + charamel.UTF32BE, |
| 23 | + charamel.UTF32LE, |
| 24 | + |
| 25 | + // ASCII |
| 26 | + charamel.ASCII, |
| 27 | + |
| 28 | + // 西欧编码 (ISO-8859) |
| 29 | + charamel.LATIN1, // ISO-8859-1 |
| 30 | + charamel.ISO88592, // ISO-8859-2 |
| 31 | + charamel.ISO88593, // ISO-8859-3 |
| 32 | + charamel.ISO88594, // ISO-8859-4 |
| 33 | + charamel.ISO88595, // ISO-8859-5 |
| 34 | + charamel.ISO88596, // ISO-8859-6 |
| 35 | + charamel.ISO88597, // ISO-8859-7 |
| 36 | + charamel.ISO88598, // ISO-8859-8 |
| 37 | + charamel.ISO88599, // ISO-8859-9 |
| 38 | + charamel.ISO885910, // ISO-8859-10 |
| 39 | + charamel.ISO885911, // ISO-8859-11 |
| 40 | + charamel.ISO885913, // ISO-8859-13 |
| 41 | + charamel.ISO885914, // ISO-8859-14 |
| 42 | + charamel.ISO885915, // ISO-8859-15 |
| 43 | + charamel.ISO885916, // ISO-8859-16 |
| 44 | + |
| 45 | + // Windows编码 (CP1250-1258) |
| 46 | + charamel.CP1250, |
| 47 | + charamel.CP1251, |
| 48 | + charamel.CP1252, |
| 49 | + charamel.CP1253, |
| 50 | + charamel.CP1254, |
| 51 | + charamel.CP1255, |
| 52 | + charamel.CP1256, |
| 53 | + charamel.CP1257, |
| 54 | + charamel.CP1258, |
| 55 | + |
| 56 | + // 中文编码 |
| 57 | + charamel.GB2312, |
| 58 | + charamel.GBK, |
| 59 | + charamel.GB18030, |
| 60 | + charamel.BIG5, |
| 61 | + charamel.BIG5HKSCS, |
| 62 | + charamel.HZ, // hz-gb-2312 |
| 63 | + |
| 64 | + // 日文编码 |
| 65 | + charamel.EUCJP, // euc-jp |
| 66 | + charamel.SHIFTJIS, // shift_jis |
| 67 | + charamel.ISO2022JP, // iso-2022-jp |
| 68 | + |
| 69 | + // 韩文编码 |
| 70 | + charamel.EUCKR, // euc-kr |
| 71 | + charamel.CP949, // windows-949 |
| 72 | + charamel.ISO2022KR, // iso-2022-kr |
| 73 | + |
| 74 | + // 俄文编码 |
| 75 | + charamel.KOI8R, // koi8-r |
| 76 | + charamel.KOI8U, // koi8-u |
| 77 | + charamel.CP866, // cp866 |
| 78 | + |
| 79 | + // 泰文编码 |
| 80 | + charamel.TIS620, // tis-620 |
| 81 | + charamel.CP874, // windows-874 |
| 82 | + |
| 83 | + // Mac编码 |
| 84 | + charamel.MACROMAN, // macintosh |
| 85 | + charamel.MACCYRILLIC, // x-mac-cyrillic |
| 86 | +} |
| 87 | + |
| 88 | +func DetectEncodingByCharamel(dat []byte) (string, error) { |
| 89 | + d, err := charamel.NewDetector(encodings, 0.9) |
| 90 | + if err != nil { |
| 91 | + return "", err |
| 92 | + } |
| 93 | + v := d.Detect(dat) |
| 94 | + if v == nil { |
| 95 | + return "", errors.New("detect failed by github.com/gonejack/charamel") |
| 96 | + } |
| 97 | + return v.String(), nil |
| 98 | +} |
0 commit comments