1
1
package com.flycode.languagetoolserver
2
2
3
+ import kotlinx.serialization.SerialName
3
4
import kotlinx.serialization.Serializable
4
5
import org.languagetool.JLanguageTool
5
6
import org.languagetool.language.AmericanEnglish
7
+ import org.languagetool.rules.ITSIssueType
8
+ import org.languagetool.rules.RuleMatch
6
9
7
10
object LanguageToolService {
8
11
private val langTool = JLanguageTool (AmericanEnglish ())
@@ -11,37 +14,211 @@ object LanguageToolService {
11
14
langTool.check(" Initialize" )
12
15
}
13
16
17
+ /* *
18
+ * The main check method. Tokenizes the text into sentences and matches these sentences against
19
+ * all currently active rules.
20
+ *
21
+ * @param text the text to be checked
22
+ * @return a List of {@link RuleMatch} objects
23
+ */
14
24
fun check (text : String ): CheckResult {
15
25
// comment in to use statistical ngram data:
16
26
// langTool.activateLanguageModelRules(new File("/data/google-ngram-data"));
17
27
return CheckResult (
18
28
text,
19
29
(langTool.check(text) ? : emptyList()).map { ruleMatch ->
20
- RuleMatchResult (
21
- ruleMatch.message,
22
- ruleMatch.shortMessage,
23
- ruleMatch.fromPos,
24
- ruleMatch.toPos - ruleMatch.fromPos,
25
- ruleMatch.suggestedReplacements ? : emptyList(),
26
- )
30
+ toMatchRuleResult(ruleMatch)
27
31
}
28
32
)
29
33
}
30
34
31
35
fun check (texts : List <String >): List <CheckResult > {
32
- // comment in to use statistical ngram data:
33
- // langTool.activateLanguageModelRules(new File("/data/google-ngram-data"));
34
36
return texts.map { check(it) }
35
37
}
36
38
}
37
39
40
+ fun toMatchRuleResult (ruleMatch : RuleMatch ): RuleMatchResult {
41
+ return RuleMatchResult (
42
+ ruleMatch.message,
43
+ ruleMatch.shortMessage,
44
+ ruleMatch.fromPos,
45
+ ruleMatch.toPos - ruleMatch.fromPos,
46
+ ruleMatch.suggestedReplacements ? : emptyList(),
47
+ toType(ruleMatch.type),
48
+ RuleResult (
49
+ ruleMatch.rule.id,
50
+ ruleMatch.rule.subId,
51
+ ruleMatch.rule.sourceFile,
52
+ ruleMatch.rule.description,
53
+ toITSIssueType(ruleMatch.rule.locQualityIssueType),
54
+ CategoryResult (
55
+ ruleMatch.rule.category.id.toString(),
56
+ ruleMatch.rule.category.name
57
+ )
58
+ )
59
+ )
60
+ }
61
+
38
62
@Serializable
39
63
data class RuleMatchResult (
64
+ /* *
65
+ * A human-readable explanation describing the error. This may contain one or more
66
+ * corrections marked up with <suggestion>...</suggestion>.
67
+ */
40
68
val message : String ,
69
+ /* *
70
+ * A shorter human-readable explanation describing the error or an empty string if no such
71
+ * explanation is available.
72
+ */
41
73
val shortMessage : String ,
74
+ /* *
75
+ * Position of the start of the error (in characters, zero-based, relative to the original
76
+ * input text).
77
+ */
42
78
val offset : Int ,
79
+ /* * The length of the error */
43
80
val length : Int ,
44
- val replacements : List <String >
45
- )
81
+ /* *
82
+ * The text fragments which might be an appropriate fix for the problem. One of these
83
+ * fragments can be used to replace the old text
84
+ */
85
+ val replacements : List <String >,
86
+ /* * The type of the rule match */
87
+ val type : RuleMatchResult .Type ,
88
+ /* * The matching rule */
89
+ val rule : RuleResult
90
+ ) {
91
+ /* *
92
+ * Unlike Category, this is specific to a RuleMatch, not to a rule. It is mainly used for
93
+ * selecting the underline color in clients. Note: this is experimental and might change soon
94
+ * (types might be added, deleted or renamed without deprecating them first)
95
+ */
96
+ enum class Type {
97
+ /* * Spelling errors, typically red. */
98
+ UnknownWord ,
99
+
100
+ /* * Style errors, typically light blue. */
101
+ Hint ,
102
+
103
+ /* * Other errors (including grammar), typically yellow/orange. */
104
+ Other
105
+ }
106
+ }
46
107
47
108
@Serializable data class CheckResult (val text : String , val matches : List <RuleMatchResult >)
109
+
110
+ /* *
111
+ * Abstract rule class. A Rule describes a language error and can test whether a given pre-analyzed
112
+ * text contains that error using the match(AnalyzedSentence) method. Rules are created whenever a
113
+ * JLanguageTool or a MultiThreadedJLanguageTool object is created. As these objects are not
114
+ * thread-safe, this can happen often. Rules should thus make sure that their initialization works
115
+ * fast. For example, if a rule needs to load data from disk, it should store it in a static
116
+ * variable to make sure the loading happens only once. Rules also need to make sure their match()
117
+ * code is stateless, i.e. that its results are not influenced by previous calls to match() (this is
118
+ * relevant if pipeline caching is used).
119
+ */
120
+ @Serializable
121
+ data class RuleResult (
122
+ /* *
123
+ * A string used to identify the rule in e.g. configuration files. This string is supposed
124
+ * to be unique and to stay the same in all upcoming versions of LanguageTool. It's supposed
125
+ * to contain only the characters A-Z and the underscore.
126
+ */
127
+ val id : String ,
128
+ /* * Optional, mostly used for XML rules (pulled from there to all rules for uniformity) */
129
+ val subId : String? ,
130
+ /* *
131
+ * Optional, mostly used for XML rules (pulled from there to all rules for uniformity) For
132
+ * XML rules, this returns the file that this rule was loaded from
133
+ */
134
+ val sourceFile : String? ,
135
+ /* *
136
+ * A short description of the error this rule can detect, usually in the language of the
137
+ * text that is checked.
138
+ */
139
+ val description : String ,
140
+ /* * The ITS Issue type */
141
+ val issue : ITSIssueTypeResult ,
142
+ val category : CategoryResult
143
+ )
144
+
145
+ @Serializable
146
+ data class CategoryResult (
147
+ val id : String ,
148
+ val name : String ,
149
+ )
150
+
151
+ fun toType (type : RuleMatch .Type ? ): RuleMatchResult .Type {
152
+ return when (type!! ) {
153
+ RuleMatch .Type .UnknownWord -> RuleMatchResult .Type .UnknownWord
154
+ RuleMatch .Type .Hint -> RuleMatchResult .Type .Hint
155
+ RuleMatch .Type .Other -> RuleMatchResult .Type .Other
156
+ }
157
+ }
158
+
159
+ fun toITSIssueType (issue : ITSIssueType ? ): ITSIssueTypeResult {
160
+ return when (issue!! ) {
161
+ ITSIssueType .Terminology -> ITSIssueTypeResult .Terminology
162
+ ITSIssueType .Mistranslation -> ITSIssueTypeResult .Mistranslation
163
+ ITSIssueType .Omission -> ITSIssueTypeResult .Omission
164
+ ITSIssueType .Untranslated -> ITSIssueTypeResult .Untranslated
165
+ ITSIssueType .Addition -> ITSIssueTypeResult .Addition
166
+ ITSIssueType .Duplication -> ITSIssueTypeResult .Duplication
167
+ ITSIssueType .Inconsistency -> ITSIssueTypeResult .Inconsistency
168
+ ITSIssueType .Grammar -> ITSIssueTypeResult .Grammar
169
+ ITSIssueType .Legal -> ITSIssueTypeResult .Legal
170
+ ITSIssueType .Register -> ITSIssueTypeResult .Register
171
+ ITSIssueType .LocaleSpecificContent -> ITSIssueTypeResult .LocaleSpecificContent
172
+ ITSIssueType .LocaleViolation -> ITSIssueTypeResult .LocaleViolation
173
+ ITSIssueType .Style -> ITSIssueTypeResult .Style
174
+ ITSIssueType .Characters -> ITSIssueTypeResult .Characters
175
+ ITSIssueType .Misspelling -> ITSIssueTypeResult .Misspelling
176
+ ITSIssueType .Typographical -> ITSIssueTypeResult .Typographical
177
+ ITSIssueType .Formatting -> ITSIssueTypeResult .Formatting
178
+ ITSIssueType .InconsistentEntities -> ITSIssueTypeResult .InconsistentEntities
179
+ ITSIssueType .Numbers -> ITSIssueTypeResult .Numbers
180
+ ITSIssueType .Markup -> ITSIssueTypeResult .Markup
181
+ ITSIssueType .PatternProblem -> ITSIssueTypeResult .PatternProblem
182
+ ITSIssueType .Whitespace -> ITSIssueTypeResult .Whitespace
183
+ ITSIssueType .Internationalization -> ITSIssueTypeResult .Internationalization
184
+ ITSIssueType .Length -> ITSIssueTypeResult .Length
185
+ ITSIssueType .NonConformance -> ITSIssueTypeResult .NonConformance
186
+ ITSIssueType .Uncategorized -> ITSIssueTypeResult .Uncategorized
187
+ ITSIssueType .Other -> ITSIssueTypeResult .Other
188
+ }
189
+ }
190
+
191
+ /* *
192
+ * Some constants for Localization Quality Issue Type from the Internationalization Tag Set (ITS)
193
+ * Version 2.0.
194
+ */
195
+ @Serializable
196
+ enum class ITSIssueTypeResult {
197
+ @SerialName(" terminology" ) Terminology ,
198
+ @SerialName(" mistranslation" ) Mistranslation ,
199
+ @SerialName(" omission" ) Omission ,
200
+ @SerialName(" untranslated" ) Untranslated ,
201
+ @SerialName(" addition" ) Addition ,
202
+ @SerialName(" duplication" ) Duplication ,
203
+ @SerialName(" inconsistency" ) Inconsistency ,
204
+ @SerialName(" grammar" ) Grammar ,
205
+ @SerialName(" legal" ) Legal ,
206
+ @SerialName(" register" ) Register ,
207
+ @SerialName(" locale-specific-content" ) LocaleSpecificContent ,
208
+ @SerialName(" locale-violation" ) LocaleViolation ,
209
+ @SerialName(" style" ) Style ,
210
+ @SerialName(" characters" ) Characters ,
211
+ @SerialName(" misspelling" ) Misspelling ,
212
+ @SerialName(" typographical" ) Typographical ,
213
+ @SerialName(" formatting" ) Formatting ,
214
+ @SerialName(" inconsistent-entities" ) InconsistentEntities ,
215
+ @SerialName(" numbers" ) Numbers ,
216
+ @SerialName(" markup" ) Markup ,
217
+ @SerialName(" pattern-problem" ) PatternProblem ,
218
+ @SerialName(" whitespace" ) Whitespace ,
219
+ @SerialName(" internationalization" ) Internationalization ,
220
+ @SerialName(" length" ) Length ,
221
+ @SerialName(" non-conformance" ) NonConformance ,
222
+ @SerialName(" uncategorized" ) Uncategorized ,
223
+ @SerialName(" other" ) Other ,
224
+ }
0 commit comments