Skip to content

Commit

Permalink
Add low accuracy mode (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
pemistahl committed Nov 14, 2022
1 parent 64a3654 commit 8508880
Show file tree
Hide file tree
Showing 207 changed files with 1,812 additions and 513 deletions.
24 changes: 23 additions & 1 deletion builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ type LanguageDetectorBuilder interface {
// dependent on the length of the input text. The longer the input
// text, the larger the distance between the languages. So if you
// want to classify very short text phrases, do not set the minimum
// relative distance too high. Otherwise you will get most results
// relative distance too high. Otherwise, you will get most results
// returned as Unknown which is the return value for cases
// where language detection is not reliably possible.
//
Expand All @@ -107,6 +107,20 @@ type LanguageDetectorBuilder interface {
// method allows to switch between these two loading modes.
WithPreloadedLanguageModels() LanguageDetectorBuilder

// WithLowAccuracyMode disables the high accuracy mode in order to save
// memory and increase performance.
//
// By default, Lingua's high detection accuracy comes at the cost of
// loading large language models into memory which might not be feasible
// for systems running low on resources.
//
// This method disables the high accuracy mode so that only a small subset
// of language models is loaded into memory. The downside of this approach
// is that detection accuracy for short texts consisting of less than 120
// characters will drop significantly. However, detection accuracy for texts
// which are longer than 120 characters will remain mostly unaffected.
WithLowAccuracyMode() LanguageDetectorBuilder

// Build creates and returns the configured instance of LanguageDetector.
Build() LanguageDetector
getLanguages() []Language
Expand All @@ -117,6 +131,7 @@ type languageDetectorBuilder struct {
languages []Language
minimumRelativeDistance float64
isEveryLanguageModelPreloaded bool
isLowAccuracyModeEnabled bool
}

// NewLanguageDetectorBuilder returns a new instance that implements the
Expand Down Expand Up @@ -225,11 +240,17 @@ func (builder *languageDetectorBuilder) WithPreloadedLanguageModels() LanguageDe
return builder
}

func (builder *languageDetectorBuilder) WithLowAccuracyMode() LanguageDetectorBuilder {
builder.isLowAccuracyModeEnabled = true
return builder
}

func (builder *languageDetectorBuilder) Build() LanguageDetector {
return newLanguageDetector(
builder.languages,
builder.minimumRelativeDistance,
builder.isEveryLanguageModelPreloaded,
builder.isLowAccuracyModeEnabled,
)
}

Expand All @@ -245,6 +266,7 @@ func (builder *languageDetectorBuilder) from(languages []Language) LanguageDetec
builder.languages = removeDuplicateLanguages(languages)
builder.minimumRelativeDistance = 0.0
builder.isEveryLanguageModelPreloaded = false
builder.isLowAccuracyModeEnabled = false
return builder
}

Expand Down
152 changes: 76 additions & 76 deletions cmd/accuracy-reports/aggregated-accuracy-values.csv
Original file line number Diff line number Diff line change
@@ -1,76 +1,76 @@
language,average-whatlang,single-words-whatlang,word-pairs-whatlang,sentences-whatlang,average-cld3,single-words-cld3,word-pairs-cld3,sentences-cld3,average-lingua,single-words-lingua,word-pairs-lingua,sentences-lingua
Afrikaans,51,21,39,92,55,22,46,98,79,58,81,97
Albanian,NaN,NaN,NaN,NaN,55,18,48,98,88,69,95,100
Arabic,89,77,91,99,90,79,92,100,98,96,99,100
Armenian,NaN,NaN,NaN,NaN,99,100,100,97,100,100,100,100
Azerbaijani,64,45,58,91,81,62,82,99,88,77,88,99
Basque,NaN,NaN,NaN,NaN,62,33,62,92,84,71,87,93
Belarusian,81,64,80,98,84,67,86,100,97,91,99,100
Bengali,100,100,100,100,99,98,99,99,100,100,100,100
Bokmal,34,15,29,60,NaN,NaN,NaN,NaN,58,39,59,75
Bosnian,NaN,NaN,NaN,NaN,33,19,28,52,35,29,35,40
Bulgarian,61,37,57,89,70,45,66,98,87,70,91,99
Catalan,NaN,NaN,NaN,NaN,48,19,42,84,70,51,74,86
Chinese,100,100,100,100,92,92,83,100,100,100,100,100
Croatian,55,28,44,91,42,26,42,58,72,53,74,89
Czech,50,31,46,71,64,39,65,88,79,65,82,90
Danish,47,24,38,79,58,26,54,95,81,61,84,98
Dutch,47,22,36,82,58,29,47,97,77,55,81,96
English,49,17,35,94,54,22,44,97,81,55,89,99
Esperanto,52,25,45,88,57,22,51,98,82,67,80,97
Estonian,61,36,53,94,70,41,69,99,92,80,96,100
Finnish,71,45,70,98,80,58,84,99,96,90,98,100
French,65,37,59,97,55,22,49,94,89,74,94,99
Ganda,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,91,79,95,100
Georgian,100,100,100,100,98,99,100,96,100,100,100,100
German,65,38,60,97,66,40,62,98,89,74,94,100
Greek,100,100,100,100,100,100,100,100,100,100,100,100
Gujarati,100,100,100,100,100,99,100,100,100,100,100,100
Hebrew,90,76,94,99,NaN,NaN,NaN,NaN,100,100,100,100
Hindi,52,27,40,88,58,34,45,95,73,61,64,93
Hungarian,62,38,53,95,76,53,76,99,95,86,98,100
Icelandic,NaN,NaN,NaN,NaN,71,42,70,99,91,79,95,100
Indonesian,67,39,66,95,46,26,45,66,60,39,61,81
Irish,NaN,NaN,NaN,NaN,67,42,66,94,91,82,94,96
Italian,56,25,47,96,62,31,57,98,87,69,92,100
Japanese,99,100,100,97,98,97,96,100,100,100,100,100
Kazakh,NaN,NaN,NaN,NaN,82,62,83,99,90,78,94,99
Korean,100,100,100,100,99,100,100,98,100,100,100,100
Latin,NaN,NaN,NaN,NaN,62,44,58,83,87,72,93,97
Latvian,59,36,54,87,75,51,77,98,93,84,96,98
Lithuanian,62,38,56,92,72,42,75,99,94,86,96,100
Macedonian,62,39,55,94,60,30,54,97,83,66,86,98
Malay,NaN,NaN,NaN,NaN,22,11,22,34,31,26,38,30
Maori,NaN,NaN,NaN,NaN,52,22,43,91,91,82,92,99
Marathi,73,52,74,93,84,69,84,98,85,74,85,96
Mongolian,NaN,NaN,NaN,NaN,83,63,87,99,97,93,98,99
Nynorsk,34,10,24,69,NaN,NaN,NaN,NaN,66,41,66,90
Persian,70,46,66,99,76,57,70,99,90,78,94,99
Polish,66,45,59,94,77,51,80,99,94,85,97,100
Portuguese,57,27,48,96,53,21,40,97,81,59,85,98
Punjabi,100,100,100,100,100,99,100,100,100,100,100,100
Romanian,59,35,52,90,53,24,48,88,86,69,91,99
Russian,53,40,52,68,71,48,72,93,90,76,95,98
Serbian,57,34,51,86,78,63,75,95,87,74,89,99
Shona,68,44,65,95,76,51,79,99,91,78,96,100
Slovak,NaN,NaN,NaN,NaN,63,32,61,96,83,63,89,98
Slovene,48,25,38,81,63,29,60,99,82,61,87,98
Somali,68,38,66,99,69,38,70,100,92,82,96,100
Sotho,NaN,NaN,NaN,NaN,49,15,33,98,85,67,90,99
Spanish,48,19,33,93,48,16,32,96,70,44,69,97
Swahili,NaN,NaN,NaN,NaN,57,25,49,98,81,60,84,98
Swedish,49,24,40,83,61,30,56,96,84,64,88,99
Tagalog,52,23,43,90,NaN,NaN,NaN,NaN,78,52,83,99
Tamil,100,100,100,100,100,100,100,99,100,100,100,100
Telugu,100,100,100,100,99,99,100,99,100,100,100,100
Thai,100,100,100,99,99,100,100,98,99,100,100,98
Tsonga,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,84,66,89,98
Tswana,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,84,65,88,99
Turkish,54,26,44,92,69,41,70,97,94,84,97,100
Ukrainian,72,53,71,93,81,62,83,98,92,84,97,95
Urdu,57,31,46,94,61,39,53,92,86,72,88,97
Vietnamese,73,36,85,97,66,26,74,99,84,78,87,87
Welsh,NaN,NaN,NaN,NaN,69,43,66,98,91,78,96,99
Xhosa,NaN,NaN,NaN,NaN,66,40,65,92,82,64,85,98
Yoruba,22,11,14,41,15,5,11,28,74,50,77,96
Zulu,70,44,68,98,63,35,63,92,81,62,83,97
language,average-whatlang,single-words-whatlang,word-pairs-whatlang,sentences-whatlang,average-cld3,single-words-cld3,word-pairs-cld3,sentences-cld3,average-lingua-low,single-words-lingua-low,word-pairs-lingua-low,sentences-lingua-low,average-lingua-high,single-words-lingua-high,word-pairs-lingua-high,sentences-lingua-high
Afrikaans,51,21,39,92,55,22,46,98,64,38,62,93,79,58,81,97
Albanian,NaN,NaN,NaN,NaN,55,18,48,98,80,54,86,99,88,69,95,100
Arabic,89,77,91,99,90,79,92,100,94,88,96,99,98,96,99,100
Armenian,NaN,NaN,NaN,NaN,99,100,100,97,100,100,100,100,100,100,100,100
Azerbaijani,65,45,58,91,81,62,82,99,82,71,78,96,90,77,92,99
Basque,NaN,NaN,NaN,NaN,62,33,62,92,74,56,76,91,84,71,87,93
Belarusian,81,64,80,98,84,67,86,100,92,80,95,100,97,92,99,100
Bengali,100,100,100,100,99,98,99,99,100,100,100,100,100,100,100,100
Bokmal,34,15,29,60,NaN,NaN,NaN,NaN,49,27,47,74,58,39,59,75
Bosnian,NaN,NaN,NaN,NaN,33,19,28,52,29,23,29,36,35,29,35,40
Bulgarian,61,37,57,89,70,45,66,98,78,56,81,96,87,70,91,99
Catalan,NaN,NaN,NaN,NaN,48,19,42,84,58,33,60,81,70,51,74,86
Chinese,100,100,100,100,92,92,83,100,100,100,100,100,100,100,100,100
Croatian,55,28,44,91,42,26,42,58,60,36,57,85,72,53,74,90
Czech,50,31,46,71,64,39,65,88,71,54,72,87,80,66,84,91
Danish,47,24,38,79,58,26,54,95,70,45,70,95,81,61,84,98
Dutch,47,22,36,82,58,29,47,97,64,36,61,94,77,55,81,96
English,49,17,36,94,54,22,44,97,62,29,62,96,81,55,89,99
Esperanto,52,25,45,88,57,22,51,98,66,44,61,92,84,67,85,98
Estonian,61,36,53,94,70,41,69,99,83,62,88,99,92,80,96,100
Finnish,71,45,70,98,80,58,84,99,91,77,95,100,96,90,98,100
French,64,37,59,97,55,22,49,94,77,52,83,97,89,74,94,99
Ganda,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,84,65,87,100,91,79,95,100
Georgian,100,100,100,100,98,99,100,96,100,100,100,100,100,100,100,100
German,65,38,60,97,66,40,62,98,80,57,84,99,89,74,94,100
Greek,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100
Gujarati,100,100,100,100,100,99,100,100,100,100,100,100,100,100,100,100
Hebrew,90,76,94,99,NaN,NaN,NaN,NaN,100,100,100,100,100,100,100,100
Hindi,52,27,40,88,58,34,45,95,33,11,20,67,73,61,64,93
Hungarian,62,38,53,95,76,53,76,99,90,77,94,100,95,87,98,100
Icelandic,NaN,NaN,NaN,NaN,71,42,70,99,88,72,92,99,93,83,97,100
Indonesian,67,39,66,95,46,26,45,66,48,25,46,72,60,39,61,81
Irish,NaN,NaN,NaN,NaN,67,42,66,94,85,70,90,95,91,82,94,96
Italian,56,25,47,96,62,31,57,98,71,42,74,98,87,69,92,100
Japanese,99,100,100,97,98,97,96,100,100,100,100,100,100,100,100,100
Kazakh,NaN,NaN,NaN,NaN,82,62,83,99,90,78,93,99,92,80,96,99
Korean,100,100,100,100,99,100,100,98,100,100,100,100,100,100,100,100
Latin,NaN,NaN,NaN,NaN,62,44,58,83,73,49,76,93,87,72,93,97
Latvian,59,36,54,87,75,51,77,98,87,75,90,97,93,85,97,99
Lithuanian,62,38,56,92,72,42,75,99,87,76,89,98,95,86,98,100
Macedonian,62,39,55,94,60,30,54,97,72,52,70,95,84,66,86,99
Malay,NaN,NaN,NaN,NaN,22,11,22,34,31,22,36,36,31,26,38,30
Maori,NaN,NaN,NaN,NaN,52,22,43,91,82,62,87,98,91,82,92,99
Marathi,73,52,74,93,84,69,84,98,41,20,30,72,85,74,85,96
Mongolian,NaN,NaN,NaN,NaN,83,63,87,99,96,89,98,99,97,93,99,99
Nynorsk,34,10,24,69,NaN,NaN,NaN,NaN,52,25,49,81,66,41,66,90
Persian,70,46,66,99,76,57,70,99,80,62,80,98,90,78,94,100
Polish,66,45,59,94,77,51,80,99,90,77,93,99,95,85,98,100
Portuguese,57,26,48,96,53,21,40,97,69,42,70,95,81,59,85,98
Punjabi,100,100,100,100,100,99,100,100,100,100,100,100,100,100,100,100
Romanian,59,34,52,90,53,24,48,88,72,49,74,94,87,69,92,99
Russian,53,40,52,68,71,48,72,93,78,59,84,92,90,76,95,98
Serbian,57,34,51,86,78,63,75,95,78,62,80,91,88,74,90,99
Shona,68,44,65,95,76,51,79,99,81,56,86,100,91,78,96,100
Slovak,NaN,NaN,NaN,NaN,63,32,61,96,75,49,78,97,84,64,90,99
Slovene,48,25,38,81,63,29,60,99,67,39,68,93,82,61,87,99
Somali,68,38,66,99,69,38,70,100,85,64,90,100,92,82,96,100
Sotho,NaN,NaN,NaN,NaN,49,15,33,98,72,43,75,97,85,67,90,99
Spanish,48,19,33,93,48,16,32,96,56,26,49,94,70,44,69,97
Swahili,NaN,NaN,NaN,NaN,57,25,49,98,70,43,68,97,81,60,84,98
Swedish,49,24,39,83,61,30,56,96,72,46,76,95,84,64,88,99
Tagalog,52,23,43,90,NaN,NaN,NaN,NaN,66,36,67,96,78,52,83,99
Tamil,100,100,100,100,100,100,100,99,100,100,100,100,100,100,100,100
Telugu,100,100,100,100,99,99,100,99,100,100,100,100,100,100,100,100
Thai,100,100,100,99,99,100,100,98,99,100,100,98,99,100,100,98
Tsonga,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,72,46,73,97,84,66,89,98
Tswana,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,71,44,73,96,84,65,88,99
Turkish,54,26,44,92,69,41,70,97,87,71,91,99,94,84,98,100
Ukrainian,72,53,71,93,81,62,83,98,86,75,92,93,92,84,97,95
Urdu,57,31,46,94,61,39,53,92,80,65,78,96,91,80,94,98
Vietnamese,73,36,85,97,66,26,74,99,87,76,87,98,91,79,94,99
Welsh,NaN,NaN,NaN,NaN,69,43,66,98,82,61,87,99,91,78,96,99
Xhosa,NaN,NaN,NaN,NaN,66,40,65,92,69,45,67,94,82,64,85,98
Yoruba,22,11,14,41,15,5,11,28,62,33,61,93,75,50,77,97
Zulu,70,44,68,98,63,35,63,92,70,45,72,94,81,62,83,97
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
##### Azerbaijani #####

>>> Accuracy on average: 87.93%
>>> Accuracy on average: 89.57%

>> Detection of 1000 single words (average length: 8 chars)
Accuracy: 77.20%
Erroneously classified as Turkish: 8.10%, Basque: 1.00%, Latin: 0.90%, Albanian: 0.80%, English: 0.70%, Tagalog: 0.70%, Esperanto: 0.60%, Lithuanian: 0.60%, Zulu: 0.60%, Danish: 0.50%, Somali: 0.50%, Swahili: 0.50%, Xhosa: 0.50%, Estonian: 0.40%, Ganda: 0.40%, Malay: 0.40%, Tsonga: 0.40%, Tswana: 0.40%, Yoruba: 0.40%, Bosnian: 0.30%, Italian: 0.30%, Nynorsk: 0.30%, Portuguese: 0.30%, Romanian: 0.30%, Spanish: 0.30%, Swedish: 0.30%, Dutch: 0.20%, Finnish: 0.20%, Indonesian: 0.20%, Shona: 0.20%, Slovene: 0.20%, Welsh: 0.20%, Afrikaans: 0.10%, Bokmal: 0.10%, Croatian: 0.10%, French: 0.10%, German: 0.10%, Hungarian: 0.10%, Icelandic: 0.10%, Irish: 0.10%, Latvian: 0.10%, Maori: 0.10%, Sotho: 0.10%
Accuracy: 77.40%
Erroneously classified as Turkish: 8.10%, Latin: 0.90%, Albanian: 0.80%, Basque: 0.80%, English: 0.70%, Tagalog: 0.70%, Esperanto: 0.60%, Lithuanian: 0.60%, Zulu: 0.60%, Danish: 0.50%, Somali: 0.50%, Swahili: 0.50%, Xhosa: 0.50%, Estonian: 0.40%, Ganda: 0.40%, Malay: 0.40%, Tsonga: 0.40%, Tswana: 0.40%, Yoruba: 0.40%, Bosnian: 0.30%, Italian: 0.30%, Nynorsk: 0.30%, Portuguese: 0.30%, Romanian: 0.30%, Spanish: 0.30%, Swedish: 0.30%, Dutch: 0.20%, Finnish: 0.20%, Indonesian: 0.20%, Shona: 0.20%, Slovene: 0.20%, Welsh: 0.20%, Afrikaans: 0.10%, Bokmal: 0.10%, Croatian: 0.10%, French: 0.10%, German: 0.10%, Hungarian: 0.10%, Icelandic: 0.10%, Irish: 0.10%, Latvian: 0.10%, Maori: 0.10%, Sotho: 0.10%

>> Detection of 1000 word pairs (average length: 16 chars)
Accuracy: 87.90%
Erroneously classified as Turkish: 7.80%, Basque: 0.50%, Swahili: 0.40%, Italian: 0.30%, Spanish: 0.30%, Albanian: 0.20%, Esperanto: 0.20%, Indonesian: 0.20%, Latin: 0.20%, Malay: 0.20%, Shona: 0.20%, Somali: 0.20%, Bosnian: 0.10%, Danish: 0.10%, Dutch: 0.10%, Estonian: 0.10%, Finnish: 0.10%, German: 0.10%, Latvian: 0.10%, Polish: 0.10%, Swedish: 0.10%, Tagalog: 0.10%, Tswana: 0.10%, Xhosa: 0.10%, Yoruba: 0.10%, Zulu: 0.10%
Accuracy: 92.30%
Erroneously classified as Turkish: 4.70%, Italian: 0.30%, Albanian: 0.20%, Basque: 0.20%, Esperanto: 0.20%, Indonesian: 0.20%, Latin: 0.20%, Shona: 0.20%, Swahili: 0.20%, Bosnian: 0.10%, Danish: 0.10%, Dutch: 0.10%, German: 0.10%, Latvian: 0.10%, Malay: 0.10%, Polish: 0.10%, Somali: 0.10%, Swedish: 0.10%, Tagalog: 0.10%, Tswana: 0.10%, Xhosa: 0.10%, Zulu: 0.10%

>> Detection of 1000 sentences (average length: 107 chars)
Accuracy: 98.70%
Erroneously classified as Turkish: 1.00%, Afrikaans: 0.10%, Sotho: 0.10%, Tagalog: 0.10%
Accuracy: 99.00%
Erroneously classified as Turkish: 0.80%, Sotho: 0.10%, Tagalog: 0.10%

File renamed without changes.
16 changes: 16 additions & 0 deletions cmd/accuracy-reports/lingua-high-accuracy/Belarusian.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
##### Belarusian #####

>>> Accuracy on average: 96.87%

>> Detection of 1000 single words (average length: 8 chars)
Accuracy: 91.50%
Erroneously classified as Russian: 2.90%, Ukrainian: 2.10%, Serbian: 1.00%, Kazakh: 0.90%, Bulgarian: 0.60%, Macedonian: 0.60%, Mongolian: 0.40%

>> Detection of 1000 word pairs (average length: 17 chars)
Accuracy: 99.20%
Erroneously classified as Russian: 0.50%, Bulgarian: 0.10%, Macedonian: 0.10%, Ukrainian: 0.10%

>> Detection of 1000 sentences (average length: 105 chars)
Accuracy: 99.90%
Erroneously classified as Kazakh: 0.10%

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ Erroneously classified as Nynorsk: 23.70%, Danish: 12.70%, Swedish: 1.40%, Germa

>> Detection of 1000 sentences (average length: 98 chars)
Accuracy: 75.40%
Erroneously classified as Nynorsk: 22.00%, Danish: 2.20%, Afrikaans: 0.10%, Dutch: 0.10%, English: 0.10%, Swedish: 0.10%
Erroneously classified as Nynorsk: 22.10%, Danish: 2.20%, Dutch: 0.10%, English: 0.10%, Swedish: 0.10%

File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
##### Catalan #####

>>> Accuracy on average: 70.03%
>>> Accuracy on average: 70.17%

>> Detection of 1000 single words (average length: 8 chars)
Accuracy: 50.60%
Erroneously classified as Spanish: 7.80%, Portuguese: 6.20%, French: 5.70%, Italian: 3.50%, Latin: 3.50%, English: 2.70%, Romanian: 2.50%, Basque: 2.10%, Esperanto: 1.90%, Yoruba: 1.10%, Tswana: 1.00%, Shona: 0.80%, Dutch: 0.70%, Somali: 0.70%, Icelandic: 0.60%, Sotho: 0.60%, Swahili: 0.60%, Afrikaans: 0.50%, Indonesian: 0.50%, Malay: 0.50%, Swedish: 0.50%, Turkish: 0.50%, Albanian: 0.40%, German: 0.40%, Nynorsk: 0.40%, Bokmal: 0.30%, Croatian: 0.30%, Finnish: 0.30%, Zulu: 0.30%, Bosnian: 0.20%, Danish: 0.20%, Estonian: 0.20%, Hungarian: 0.20%, Polish: 0.20%, Tagalog: 0.20%, Tsonga: 0.20%, Vietnamese: 0.20%, Welsh: 0.20%, Xhosa: 0.20%, Irish: 0.10%, Latvian: 0.10%, Lithuanian: 0.10%, Maori: 0.10%, Slovene: 0.10%

>> Detection of 1000 word pairs (average length: 16 chars)
Accuracy: 73.80%
Erroneously classified as Spanish: 8.90%, Portuguese: 3.70%, French: 2.90%, Yoruba: 2.20%, Italian: 1.90%, Latin: 1.90%, English: 1.60%, Romanian: 0.40%, Swahili: 0.30%, Dutch: 0.20%, Esperanto: 0.20%, Irish: 0.20%, Slovak: 0.20%, Tagalog: 0.20%, Welsh: 0.20%, Afrikaans: 0.10%, Albanian: 0.10%, Basque: 0.10%, Finnish: 0.10%, German: 0.10%, Hungarian: 0.10%, Lithuanian: 0.10%, Nynorsk: 0.10%, Sotho: 0.10%, Tsonga: 0.10%, Tswana: 0.10%, Vietnamese: 0.10%
Accuracy: 73.90%
Erroneously classified as Spanish: 8.90%, Portuguese: 3.70%, French: 2.90%, Yoruba: 2.20%, Italian: 1.90%, Latin: 1.90%, English: 1.60%, Romanian: 0.40%, Swahili: 0.30%, Basque: 0.20%, Dutch: 0.20%, Esperanto: 0.20%, Irish: 0.20%, Slovak: 0.20%, Tagalog: 0.20%, Welsh: 0.20%, Afrikaans: 0.10%, Albanian: 0.10%, Finnish: 0.10%, German: 0.10%, Hungarian: 0.10%, Lithuanian: 0.10%, Nynorsk: 0.10%, Tsonga: 0.10%, Vietnamese: 0.10%

>> Detection of 1000 sentences (average length: 103 chars)
Accuracy: 85.70%
Erroneously classified as Spanish: 6.40%, English: 1.70%, Latin: 1.40%, French: 1.00%, Yoruba: 0.70%, Tagalog: 0.50%, Basque: 0.30%, Italian: 0.30%, Portuguese: 0.30%, Tsonga: 0.30%, Romanian: 0.20%, Swahili: 0.20%, Vietnamese: 0.20%, Danish: 0.10%, Esperanto: 0.10%, Finnish: 0.10%, German: 0.10%, Malay: 0.10%, Slovene: 0.10%, Tswana: 0.10%, Xhosa: 0.10%
Accuracy: 86.00%
Erroneously classified as Spanish: 6.60%, English: 1.80%, Latin: 1.40%, French: 1.00%, Yoruba: 0.60%, Portuguese: 0.40%, Basque: 0.30%, Italian: 0.30%, Romanian: 0.20%, Swahili: 0.20%, Tagalog: 0.20%, Vietnamese: 0.20%, Danish: 0.10%, Esperanto: 0.10%, Finnish: 0.10%, German: 0.10%, Malay: 0.10%, Slovene: 0.10%, Tswana: 0.10%, Xhosa: 0.10%

File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
##### Croatian #####

>>> Accuracy on average: 72.33%
>>> Accuracy on average: 72.40%

>> Detection of 1000 single words (average length: 8 chars)
Accuracy: 53.40%
Expand All @@ -11,6 +11,6 @@ Accuracy: 74.30%
Erroneously classified as Bosnian: 19.00%, Slovene: 3.50%, Slovak: 0.70%, English: 0.50%, Basque: 0.20%, Latin: 0.20%, Lithuanian: 0.20%, Polish: 0.20%, Swahili: 0.20%, Turkish: 0.20%, Afrikaans: 0.10%, Albanian: 0.10%, Czech: 0.10%, Esperanto: 0.10%, Italian: 0.10%, Nynorsk: 0.10%, Portuguese: 0.10%, Romanian: 0.10%

>> Detection of 1000 sentences (average length: 127 chars)
Accuracy: 89.30%
Erroneously classified as Bosnian: 10.20%, Latin: 0.10%, Shona: 0.10%, Somali: 0.10%, Swahili: 0.10%, Tsonga: 0.10%
Accuracy: 89.50%
Erroneously classified as Bosnian: 10.30%, Latin: 0.10%, Shona: 0.10%

Loading

0 comments on commit 8508880

Please sign in to comment.