forked from mozillazg/go-pinyin
-
Notifications
You must be signed in to change notification settings - Fork 1
/
paragraph.go
75 lines (68 loc) · 1.54 KB
/
paragraph.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
package pinyin
import (
"regexp"
"strings"
"unicode"
)
var (
splacesRegexp = regexp.MustCompile(`[\s]+`)
allowCharsRegexp = regexp.MustCompile(`[a-zA-Z0-9\.,\?\!;\(\)\[\]\&\=\-_@\s]`)
hansSymbols = map[string]string{
"?": "?",
"!": "!",
":": ":",
"。": ".",
",": ",",
";": ";",
"(": "(",
")": ")",
"【": "[",
"】": "]",
}
paragraphOption = Args{
Style: NORMAL,
Heteronym: true,
}
)
// Paragraph convert a Chinese paragraph into pinyin, including letters, numbers, symbols
func Paragraph(p string) (s string) {
p = pinyinPhrase(p)
for _, r := range p {
if unicode.Is(unicode.Han, r) {
// Han chars
result := Pinyin(string(r), paragraphOption)
if len(result) == 0 {
continue
}
if len(result[0]) == 0 {
continue
}
s += " " + string(result[0][0]) + " "
} else {
// Other chars
char := string(r)
if allowCharsRegexp.MatchString(char) {
s += char
} else {
if hansSymbols[char] != "" {
s += hansSymbols[char]
}
}
}
}
// 去连续两个空格
s = splacesRegexp.ReplaceAllString(s, " ")
// 去掉 , . ? 前面的空格
s = strings.Replace(s, " ,", ",", -1)
s = strings.Replace(s, " .", ".", -1)
s = strings.Replace(s, " ?", "?", -1)
s = strings.Replace(s, " ;", ";", -1)
s = strings.Replace(s, " !", "!", -1)
s = strings.Replace(s, "( ", "(", -1)
s = strings.Replace(s, " )", ")", -1)
s = strings.Replace(s, "[ ", "[", -1)
s = strings.Replace(s, " ]", "]", -1)
s = strings.Replace(s, " :", ":", -1)
s = strings.TrimSpace(s)
return
}