|
| 1 | +package chardet |
| 2 | + |
| 3 | +import ( |
| 4 | + "errors" |
| 5 | + "runtime" |
| 6 | + "unsafe" |
| 7 | + |
| 8 | + "github.com/ebitengine/purego" |
| 9 | +) |
| 10 | + |
| 11 | +func DetectEncodingByUChardetDylib(dat []byte) (string, error) { |
| 12 | + if lib == 0 { |
| 13 | + return "", errors.New("no uchardet dylib found") |
| 14 | + } |
| 15 | + dec := NewChardet() |
| 16 | + defer dec.Release() |
| 17 | + if dec.Handle(dat) == 0 { |
| 18 | + if v := dec.End(); v > "" { |
| 19 | + return v, nil |
| 20 | + } |
| 21 | + } |
| 22 | + return "", errors.New("detect failed by uchardet") |
| 23 | +} |
| 24 | + |
| 25 | +var ( |
| 26 | + lib uintptr |
| 27 | + uchardetNew func() unsafe.Pointer |
| 28 | + uchardetDelete func(det unsafe.Pointer) |
| 29 | + uchardetHandleData func(det unsafe.Pointer, data unsafe.Pointer, len uintptr) int |
| 30 | + uchardetDataEnd func(det unsafe.Pointer) |
| 31 | + uchardetGetCharset func(det unsafe.Pointer) *byte // 返回 C 字符串 (char*) |
| 32 | + uchardetReset func(det unsafe.Pointer) |
| 33 | +) |
| 34 | + |
| 35 | +func init() { |
| 36 | + var err error |
| 37 | + var name = uchardetLib() |
| 38 | + lib, err = purego.Dlopen(name, purego.RTLD_NOW|purego.RTLD_GLOBAL) |
| 39 | + if err != nil { |
| 40 | + return |
| 41 | + } |
| 42 | + purego.RegisterLibFunc(&uchardetNew, lib, "uchardet_new") |
| 43 | + purego.RegisterLibFunc(&uchardetDelete, lib, "uchardet_delete") |
| 44 | + purego.RegisterLibFunc(&uchardetHandleData, lib, "uchardet_handle_data") |
| 45 | + purego.RegisterLibFunc(&uchardetDataEnd, lib, "uchardet_data_end") |
| 46 | + purego.RegisterLibFunc(&uchardetGetCharset, lib, "uchardet_get_charset") |
| 47 | + purego.RegisterLibFunc(&uchardetReset, lib, "uchardet_reset") |
| 48 | +} |
| 49 | + |
| 50 | +type Chardet struct { |
| 51 | + det unsafe.Pointer |
| 52 | +} |
| 53 | + |
| 54 | +func NewChardet() *Chardet { |
| 55 | + if uchardetNew == nil { |
| 56 | + return nil |
| 57 | + } |
| 58 | + return &Chardet{ |
| 59 | + det: uchardetNew(), |
| 60 | + } |
| 61 | +} |
| 62 | +func (c *Chardet) Release() { |
| 63 | + if c.det != nil { |
| 64 | + uchardetDelete(c.det) |
| 65 | + c.det = nil |
| 66 | + } |
| 67 | +} |
| 68 | +func (c *Chardet) Handle(buf []byte) int { |
| 69 | + if c.det == nil || len(buf) == 0 { |
| 70 | + return -1 // 或其他错误指示 |
| 71 | + } |
| 72 | + dataPtr := unsafe.Pointer(&buf[0]) |
| 73 | + dlen := uintptr(len(buf)) |
| 74 | + return uchardetHandleData(c.det, dataPtr, dlen) |
| 75 | +} |
| 76 | +func (c *Chardet) End() string { |
| 77 | + uchardetDataEnd(c.det) |
| 78 | + cString := uchardetGetCharset(c.det) |
| 79 | + return cstrToString(cString) |
| 80 | +} |
| 81 | + |
| 82 | +func uchardetLib() string { |
| 83 | + switch runtime.GOOS { |
| 84 | + case "darwin": |
| 85 | + return "/opt/homebrew/lib/libuchardet.dylib" |
| 86 | + case "linux": |
| 87 | + return "libuchardet.so" |
| 88 | + default: |
| 89 | + return "" |
| 90 | + } |
| 91 | +} |
| 92 | +func cstrToSlice(cptr *byte) []byte { |
| 93 | + if cptr == nil { |
| 94 | + return nil |
| 95 | + } |
| 96 | + var length int |
| 97 | + for ptr := cptr; *ptr != 0; ptr = (*byte)(unsafe.Add(unsafe.Pointer(ptr), 1)) { |
| 98 | + length++ |
| 99 | + } |
| 100 | + return unsafe.Slice(cptr, length) |
| 101 | +} |
| 102 | +func cstrToString(cptr *byte) string { |
| 103 | + return string(cstrToSlice(cptr)) |
| 104 | +} |
0 commit comments