Skip to content

Commit 14f99e7

Browse files
authored
Merge pull request #1 from flycode-org/feature/more-properties
More Properties
2 parents 33282eb + b6bce9d commit 14f99e7

File tree

1 file changed

+188
-11
lines changed

1 file changed

+188
-11
lines changed
Lines changed: 188 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
package com.flycode.languagetoolserver
22

3+
import kotlinx.serialization.SerialName
34
import kotlinx.serialization.Serializable
45
import org.languagetool.JLanguageTool
56
import org.languagetool.language.AmericanEnglish
7+
import org.languagetool.rules.ITSIssueType
8+
import org.languagetool.rules.RuleMatch
69

710
object LanguageToolService {
811
private val langTool = JLanguageTool(AmericanEnglish())
@@ -11,37 +14,211 @@ object LanguageToolService {
1114
langTool.check("Initialize")
1215
}
1316

17+
/**
18+
* The main check method. Tokenizes the text into sentences and matches these sentences against
19+
* all currently active rules.
20+
*
21+
* @param text the text to be checked
22+
* @return a List of {@link RuleMatch} objects
23+
*/
1424
fun check(text: String): CheckResult {
1525
// comment in to use statistical ngram data:
1626
// langTool.activateLanguageModelRules(new File("/data/google-ngram-data"));
1727
return CheckResult(
1828
text,
1929
(langTool.check(text) ?: emptyList()).map { ruleMatch ->
20-
RuleMatchResult(
21-
ruleMatch.message,
22-
ruleMatch.shortMessage,
23-
ruleMatch.fromPos,
24-
ruleMatch.toPos - ruleMatch.fromPos,
25-
ruleMatch.suggestedReplacements ?: emptyList(),
26-
)
30+
toMatchRuleResult(ruleMatch)
2731
}
2832
)
2933
}
3034

3135
fun check(texts: List<String>): List<CheckResult> {
32-
// comment in to use statistical ngram data:
33-
// langTool.activateLanguageModelRules(new File("/data/google-ngram-data"));
3436
return texts.map { check(it) }
3537
}
3638
}
3739

40+
fun toMatchRuleResult(ruleMatch: RuleMatch): RuleMatchResult {
41+
return RuleMatchResult(
42+
ruleMatch.message,
43+
ruleMatch.shortMessage,
44+
ruleMatch.fromPos,
45+
ruleMatch.toPos - ruleMatch.fromPos,
46+
ruleMatch.suggestedReplacements ?: emptyList(),
47+
toType(ruleMatch.type),
48+
RuleResult(
49+
ruleMatch.rule.id,
50+
ruleMatch.rule.subId,
51+
ruleMatch.rule.sourceFile,
52+
ruleMatch.rule.description,
53+
toITSIssueType(ruleMatch.rule.locQualityIssueType),
54+
CategoryResult(
55+
ruleMatch.rule.category.id.toString(),
56+
ruleMatch.rule.category.name
57+
)
58+
)
59+
)
60+
}
61+
3862
@Serializable
3963
data class RuleMatchResult(
64+
/**
65+
* A human-readable explanation describing the error. This may contain one or more
66+
* corrections marked up with <suggestion>...</suggestion>.
67+
*/
4068
val message: String,
69+
/**
70+
* A shorter human-readable explanation describing the error or an empty string if no such
71+
* explanation is available.
72+
*/
4173
val shortMessage: String,
74+
/**
75+
* Position of the start of the error (in characters, zero-based, relative to the original
76+
* input text).
77+
*/
4278
val offset: Int,
79+
/** The length of the error */
4380
val length: Int,
44-
val replacements: List<String>
45-
)
81+
/**
82+
* The text fragments which might be an appropriate fix for the problem. One of these
83+
* fragments can be used to replace the old text
84+
*/
85+
val replacements: List<String>,
86+
/** The type of the rule match */
87+
val type: RuleMatchResult.Type,
88+
/** The matching rule */
89+
val rule: RuleResult
90+
) {
91+
/**
92+
* Unlike Category, this is specific to a RuleMatch, not to a rule. It is mainly used for
93+
* selecting the underline color in clients. Note: this is experimental and might change soon
94+
* (types might be added, deleted or renamed without deprecating them first)
95+
*/
96+
enum class Type {
97+
/** Spelling errors, typically red. */
98+
UnknownWord,
99+
100+
/** Style errors, typically light blue. */
101+
Hint,
102+
103+
/** Other errors (including grammar), typically yellow/orange. */
104+
Other
105+
}
106+
}
46107

47108
@Serializable data class CheckResult(val text: String, val matches: List<RuleMatchResult>)
109+
110+
/**
111+
* Abstract rule class. A Rule describes a language error and can test whether a given pre-analyzed
112+
* text contains that error using the match(AnalyzedSentence) method. Rules are created whenever a
113+
* JLanguageTool or a MultiThreadedJLanguageTool object is created. As these objects are not
114+
* thread-safe, this can happen often. Rules should thus make sure that their initialization works
115+
* fast. For example, if a rule needs to load data from disk, it should store it in a static
116+
* variable to make sure the loading happens only once. Rules also need to make sure their match()
117+
* code is stateless, i.e. that its results are not influenced by previous calls to match() (this is
118+
* relevant if pipeline caching is used).
119+
*/
120+
@Serializable
121+
data class RuleResult(
122+
/**
123+
* A string used to identify the rule in e.g. configuration files. This string is supposed
124+
* to be unique and to stay the same in all upcoming versions of LanguageTool. It's supposed
125+
* to contain only the characters A-Z and the underscore.
126+
*/
127+
val id: String,
128+
/** Optional, mostly used for XML rules (pulled from there to all rules for uniformity) */
129+
val subId: String?,
130+
/**
131+
* Optional, mostly used for XML rules (pulled from there to all rules for uniformity) For
132+
* XML rules, this returns the file that this rule was loaded from
133+
*/
134+
val sourceFile: String?,
135+
/**
136+
* A short description of the error this rule can detect, usually in the language of the
137+
* text that is checked.
138+
*/
139+
val description: String,
140+
/** The ITS Issue type */
141+
val issue: ITSIssueTypeResult,
142+
val category: CategoryResult
143+
)
144+
145+
@Serializable
146+
data class CategoryResult(
147+
val id: String,
148+
val name: String,
149+
)
150+
151+
fun toType(type: RuleMatch.Type?): RuleMatchResult.Type {
152+
return when (type!!) {
153+
RuleMatch.Type.UnknownWord -> RuleMatchResult.Type.UnknownWord
154+
RuleMatch.Type.Hint -> RuleMatchResult.Type.Hint
155+
RuleMatch.Type.Other -> RuleMatchResult.Type.Other
156+
}
157+
}
158+
159+
fun toITSIssueType(issue: ITSIssueType?): ITSIssueTypeResult {
160+
return when (issue!!) {
161+
ITSIssueType.Terminology -> ITSIssueTypeResult.Terminology
162+
ITSIssueType.Mistranslation -> ITSIssueTypeResult.Mistranslation
163+
ITSIssueType.Omission -> ITSIssueTypeResult.Omission
164+
ITSIssueType.Untranslated -> ITSIssueTypeResult.Untranslated
165+
ITSIssueType.Addition -> ITSIssueTypeResult.Addition
166+
ITSIssueType.Duplication -> ITSIssueTypeResult.Duplication
167+
ITSIssueType.Inconsistency -> ITSIssueTypeResult.Inconsistency
168+
ITSIssueType.Grammar -> ITSIssueTypeResult.Grammar
169+
ITSIssueType.Legal -> ITSIssueTypeResult.Legal
170+
ITSIssueType.Register -> ITSIssueTypeResult.Register
171+
ITSIssueType.LocaleSpecificContent -> ITSIssueTypeResult.LocaleSpecificContent
172+
ITSIssueType.LocaleViolation -> ITSIssueTypeResult.LocaleViolation
173+
ITSIssueType.Style -> ITSIssueTypeResult.Style
174+
ITSIssueType.Characters -> ITSIssueTypeResult.Characters
175+
ITSIssueType.Misspelling -> ITSIssueTypeResult.Misspelling
176+
ITSIssueType.Typographical -> ITSIssueTypeResult.Typographical
177+
ITSIssueType.Formatting -> ITSIssueTypeResult.Formatting
178+
ITSIssueType.InconsistentEntities -> ITSIssueTypeResult.InconsistentEntities
179+
ITSIssueType.Numbers -> ITSIssueTypeResult.Numbers
180+
ITSIssueType.Markup -> ITSIssueTypeResult.Markup
181+
ITSIssueType.PatternProblem -> ITSIssueTypeResult.PatternProblem
182+
ITSIssueType.Whitespace -> ITSIssueTypeResult.Whitespace
183+
ITSIssueType.Internationalization -> ITSIssueTypeResult.Internationalization
184+
ITSIssueType.Length -> ITSIssueTypeResult.Length
185+
ITSIssueType.NonConformance -> ITSIssueTypeResult.NonConformance
186+
ITSIssueType.Uncategorized -> ITSIssueTypeResult.Uncategorized
187+
ITSIssueType.Other -> ITSIssueTypeResult.Other
188+
}
189+
}
190+
191+
/**
192+
* Some constants for Localization Quality Issue Type from the Internationalization Tag Set (ITS)
193+
* Version 2.0.
194+
*/
195+
@Serializable
196+
enum class ITSIssueTypeResult {
197+
@SerialName("terminology") Terminology,
198+
@SerialName("mistranslation") Mistranslation,
199+
@SerialName("omission") Omission,
200+
@SerialName("untranslated") Untranslated,
201+
@SerialName("addition") Addition,
202+
@SerialName("duplication") Duplication,
203+
@SerialName("inconsistency") Inconsistency,
204+
@SerialName("grammar") Grammar,
205+
@SerialName("legal") Legal,
206+
@SerialName("register") Register,
207+
@SerialName("locale-specific-content") LocaleSpecificContent,
208+
@SerialName("locale-violation") LocaleViolation,
209+
@SerialName("style") Style,
210+
@SerialName("characters") Characters,
211+
@SerialName("misspelling") Misspelling,
212+
@SerialName("typographical") Typographical,
213+
@SerialName("formatting") Formatting,
214+
@SerialName("inconsistent-entities") InconsistentEntities,
215+
@SerialName("numbers") Numbers,
216+
@SerialName("markup") Markup,
217+
@SerialName("pattern-problem") PatternProblem,
218+
@SerialName("whitespace") Whitespace,
219+
@SerialName("internationalization") Internationalization,
220+
@SerialName("length") Length,
221+
@SerialName("non-conformance") NonConformance,
222+
@SerialName("uncategorized") Uncategorized,
223+
@SerialName("other") Other,
224+
}

0 commit comments

Comments
 (0)