Skip to content

Commit f132b9b

Browse files
committed
Fixes Index out of bound error
1 parent 699bed9 commit f132b9b

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

src/cadmium/language_detector/language.cr

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module Cadmium
22
module Language
33
struct IsoCode3To1
4-
getter codes : Hash(String, String) = {
4+
getter codes : Hash(String, String) = { # Symbolize keys + values
55
"aae" => "sq",
66
"aao" => "ar",
77
"aar" => "aa",
@@ -420,7 +420,7 @@ module Cadmium
420420
class LanguageData
421421
# "Reference ": "htt =>":/ =>www.unicode.org/Public/UNIDATA/Blocks.txt
422422
# Removed utf-16 characters because crystal pcre regex implementation doesn't support them
423-
getter expressions : Hash(String, Regex) = {
423+
getter expressions : Hash(String, Regex) = { # Symbolize keys
424424
"cmn" => /[\x{2E80}-\x{2E99}\x{2E9B}-\x{2EF3}\x{2F00}-\x{2FD5}\x{3005}\x{3007}\x{3021}-\x{3029}\x{3038}-\x{303B}\x{3400}-\x{4DB5}\x{4E00}-\x{9FCC}\x{F900}-\x{FA6D}\x{FA70}-\x{FAD9}]/im,
425425
"Latin" => /[A-Za-z\xAA\xBA\xC0-\xD6\xD8-\xF6\xF8-\x{02B8}\x{02E0}-\x{02E4}\x{1D00}-\x{1D25}\x{1D2C}-\x{1D5C}\x{1D62}-\x{1D65}\x{1D6B}-\x{1D77}\x{1D79}-\x{1DBE}\x{1E00}-\x{1EFF}\x{2071}\x{207F}\x{2090}-\x{209C}\x{212A}\x{212B}\x{2132}\x{214E}\x{2160}-\x{2188}\x{2C60}-\x{2C7F}\x{A722}-\x{A787}\x{A78B}-\x{A78E}\x{A790}-\x{A7AD}\x{A7B0}\x{A7B1}\x{A7F7}-\x{A7FF}\x{AB30}-\x{AB5A}\x{AB5C}-\x{AB5F}\x{AB64}\x{FB00}-\x{FB06}\x{FF21}-\x{FF3A}\x{FF41}-\x{FF5A}]/,
426426
"Cyrillic" => /[\x{0400}-\x{0484}\x{0487}-\x{052F}\x{1D2B}\x{1D78}\x{2DE0}-\x{2DFF}\x{A640}-\x{A69D}\x{A69F}]/,
@@ -459,7 +459,7 @@ module Cadmium
459459
"blt" => /[\x{AA80}-\x{AAC2}\x{AADB}-\x{AADF}]/,
460460
}
461461
DATA_FILE = "#{__DIR__}/../data/data.json"
462-
getter trigrams : Hash(String, Hash(String, String)) # Should be namedtuple : will be when crystal 0.31 is out (from_json issue fixed in master #8109)
462+
getter trigrams : Hash(String, Hash(String, String)) # Symbolize keys
463463

464464
def initialize # Workaround for https://github.com/crystal-lang/crystal/issues/8163
465465
@trigrams = Hash(String, Hash(String, String)).from_json({{ read_file(DATA_FILE) }})

src/cadmium/language_detector/language_detector.cr

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ module Cadmium
77
@@lang_data = LanguageData.new
88
@@trigrams_data : Hash(String, Hash(String, String)) = @@lang_data.trigrams
99
@@expressions : Hash(String, Regex) = @@lang_data.expressions
10-
@@languages = Hash(String, Array(String)).new
11-
@@iso_hash : Hash(String, String) = IsoCode3To1.new.codes
10+
@@languages = Hash(String, Array(String)).new # Symbolize keys
11+
@@iso_hash : Hash(String, String) = IsoCode3To1.new.codes # Symbolize keys + values
1212

1313
def initialize
1414
@@trigrams_data.values.each do |languages|
@@ -42,8 +42,8 @@ module Cadmium
4242
end
4343

4444
private def normalize(text : String, distances : Hash(String, Int32)) : Hash(String, Float64)
45-
min = distances.values[1]
46-
max = text.size * 300 - min
45+
min = !distances.values[1..].empty? ? distances.values[1] : 1
46+
max = (text.size + 1) * 300 - min
4747
distances_float = Hash(String, Float64).new
4848
distances.each do |string, distance|
4949
distances_float[string] = (1 - (distance - min) / max).to_f || 0.0

0 commit comments

Comments
 (0)