Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HU Setup + Numeral #79

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
HU Setup + numeral
  • Loading branch information
dubovinszky committed Aug 9, 2017
commit eb26053efe3850104ced71701e6f7e5d84cd745c
2 changes: 2 additions & 0 deletions Duckling/Dimensions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import qualified Duckling.Dimensions.FR as FRDimensions
import qualified Duckling.Dimensions.GA as GADimensions
import qualified Duckling.Dimensions.HE as HEDimensions
import qualified Duckling.Dimensions.HR as HRDimensions
import qualified Duckling.Dimensions.HU as HUDimensions
import qualified Duckling.Dimensions.ID as IDDimensions
import qualified Duckling.Dimensions.IT as ITDimensions
import qualified Duckling.Dimensions.JA as JADimensions
Expand Down Expand Up @@ -88,6 +89,7 @@ langDimensions FR = FRDimensions.allDimensions
langDimensions GA = GADimensions.allDimensions
langDimensions HE = HEDimensions.allDimensions
langDimensions HR = HRDimensions.allDimensions
langDimensions HU = HUDimensions.allDimensions
langDimensions ID = IDDimensions.allDimensions
langDimensions IT = ITDimensions.allDimensions
langDimensions JA = JADimensions.allDimensions
Expand Down
18 changes: 18 additions & 0 deletions Duckling/Dimensions/HU.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.


module Duckling.Dimensions.HU
( allDimensions
) where

import Duckling.Dimensions.Types

allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
]
1 change: 1 addition & 0 deletions Duckling/Lang.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ data Lang
| GA
| HE
| HR
| HU
| ID
| IT
| JA
Expand Down
90 changes: 90 additions & 0 deletions Duckling/Numeral/HU/Corpus.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.


{-# LANGUAGE OverloadedStrings #-}

module Duckling.Numeral.HU.Corpus
( corpus ) where

import Data.String
import Prelude

import Duckling.Lang
import Duckling.Numeral.Types
import Duckling.Resolve
import Duckling.Testing.Types

corpus :: Corpus
corpus = (testContext {lang = HU}, allExamples)

allExamples :: [Example]
allExamples = concat
[ examples (NumeralValue 0)
[ "0"
, "nulla"
, "zéró"
]
, examples (NumeralValue 1)
[ "1"
, "egy"
]
, examples (NumeralValue 2)
[ "kettő"
]
, examples (NumeralValue 3)
[ "három"
]
, examples (NumeralValue 4)
[ "négy"
]
, examples (NumeralValue 5)
[ "öt"
]
, examples (NumeralValue 6)
[ "hat"
]
, examples (NumeralValue 7)
[ "hét"
]
, examples (NumeralValue 8)
[ "nyolc"
]
, examples (NumeralValue 9)
[ "kilenc"
]
, examples (NumeralValue 11)
[ "tizenegy"
]
, examples (NumeralValue 15)
[ "tizenöt"
]
, examples (NumeralValue 17)
[ "tizenhét"
]
, examples (NumeralValue 22)
[ "huszonkettő"
]
, examples (NumeralValue 26)
[ "huszonhat"
]
, examples (NumeralValue 28)
[ "huszonnyolc"
]
, examples (NumeralValue 10)
[ "tíz"
]
, examples (NumeralValue 20)
[ "húsz"
]
, examples (NumeralValue 50)
[ "ötven"
]
, examples (NumeralValue 34)
[ "harmincnégy"
]
]
182 changes: 182 additions & 0 deletions Duckling/Numeral/HU/Rules.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.


{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}

module Duckling.Numeral.HU.Rules
( rules ) where

import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text

import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral

ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}

ruleNumeralMap :: HashMap Text Integer
ruleNumeralMap = HashMap.fromList
[ ( "nulla", 0 )
, ( "z\x00E9r\x00F3", 0 )
, ( "egy", 1 )
, ( "kett\x0151", 2 )
, ( "h\x00E1rom", 3 )
, ( "n\x00E9gy", 4 )
, ( "\x00F6t", 5)
, ( "hat", 6)
, ( "h\x00E9t", 7)
, ( "nyolc", 8)
, ( "kilenc", 9)
, ( "t\x00EDz", 10)
]

ruleNumeral :: Rule
ruleNumeral = Rule
{ name = "number (0..10)"
, pattern =
[ regex "(nulla|z\x00E9r\x00F3|egy|kett\x0151|h\x00E1rom|n\x00E9gy|\x00F6t|hat|h\x00E9t|nyolc|kilenc|t\x00EDz)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) ruleNumeralMap >>= integer
_ -> Nothing
}

elevenToNineteenMap :: HashMap Text Integer
elevenToNineteenMap = HashMap.fromList
[ ( "tizenegy", 11 )
, ( "tizenkett\x0151", 12 )
, ( "tizenh\x00E1rom", 13 )
, ( "tizenn\x00E9gy", 14 )
, ( "tizen\x00F6t", 15 )
, ( "tizenhat", 16 )
, ( "tizenh\x00E9t", 17 )
, ( "tizennyolc", 18 )
, ( "tizenkilenc", 19 )
]

ruleElevenToNineteen :: Rule
ruleElevenToNineteen = Rule
{ name = "number (11..19)"
, pattern =
[ regex "(tizenegy|tizenkett\x0151|tizenh\x00E1rom|tizenn\x00E9gy|tizen\x00F6t|tizenhat|tizenh\x00E9t|tizennyolc|tizenkilenc)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) elevenToNineteenMap >>= integer
_ -> Nothing
}

twentyoneToTwentynineMap :: HashMap Text Integer
twentyoneToTwentynineMap = HashMap.fromList
[ ( "huszonegy", 21 )
, ( "huszonkett\x0151", 22 )
, ( "huszonh\x00E1rom", 23 )
, ( "huszonn\x00E9gy", 24 )
, ( "huszon\x00F6t", 25 )
, ( "huszonhat", 26 )
, ( "huszonh\x00E9t", 27 )
, ( "huszonnyolc", 28 )
, ( "huszonkilenc", 29 )
]

ruleTwentyoneToTwentynine :: Rule
ruleTwentyoneToTwentynine = Rule
{ name = "number (21..29)"
, pattern =
[ regex "(huszonegy|huszonkett\x0151|huszonh\x00E1rom|huszonn\x00E9gy|huszon\x00F6t|huszonhat|huszonh\x00E9t|huszonnyolc|huszonkilenc)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) twentyoneToTwentynineMap >>= integer
_ -> Nothing
}

ruleTens :: Rule
ruleTens = Rule
{ name = "integer (20,30..90)"
, pattern = [regex "(h\x00FAsz|harminc|negyven|\x00F6tven|hatvan|hetven|nyolcvan|kilencven)"]
, prod = \tokens ->
case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
case Text.toLower match of
"h\x00FAsz" -> integer 20
"harminc" -> integer 30
"negyven" -> integer 40
"\x00F6tven" -> integer 50
"hatvan" -> integer 60
"hetven" -> integer 70
"nyolcvan" -> integer 80
"kilencven" -> integer 90
_ -> Nothing
_ -> Nothing
}

ruleCompositeTens :: Rule
ruleCompositeTens = Rule
{ name = "integer ([3-9][1-9])"
, pattern =
[ regex "(harminc|negyven|\x00F6tven|hatvan|hetven|nyolcvan|kilencven)(egy|kett\x0151|h\x00E1rom|n\x00E9gy|\x00F6t|hat|h\x00E9t|nyolc|kilenc)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do
v1 <- case Text.toLower m1 of
"harminc" -> Just 30
"negyven" -> Just 40
"\x00F6tven" -> Just 50
"hatvan" -> Just 60
"hetven" -> Just 70
"nyolcvan" -> Just 80
"kilencven" -> Just 90
_ -> Nothing
v2 <- case Text.toLower m2 of
"egy" -> Just 1
"kett\x0151" -> Just 2
"h\x00E1rom" -> Just 3
"n\x00E9gy" -> Just 4
"\x00F6t" -> Just 5
"hat" -> Just 6
"h\x00E9t" -> Just 7
"nyolc" -> Just 8
"kilenc" -> Just 9
_ -> Nothing
integer $ v1 + v2
_ -> Nothing
}

rules :: [Rule]
rules =
[ ruleIntegerNumeric
, ruleNumeral
, ruleElevenToNineteen
, ruleTwentyoneToTwentynine
, ruleTens
, ruleCompositeTens
]
2 changes: 2 additions & 0 deletions Duckling/Ranking/Classifiers.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import qualified Duckling.Ranking.Classifiers.FR as FRClassifiers
import qualified Duckling.Ranking.Classifiers.GA as GAClassifiers
import qualified Duckling.Ranking.Classifiers.HE as HEClassifiers
import qualified Duckling.Ranking.Classifiers.HR as HRClassifiers
import qualified Duckling.Ranking.Classifiers.HU as HUClassifiers
import qualified Duckling.Ranking.Classifiers.ID as IDClassifiers
import qualified Duckling.Ranking.Classifiers.IT as ITClassifiers
import qualified Duckling.Ranking.Classifiers.JA as JAClassifiers
Expand Down Expand Up @@ -52,6 +53,7 @@ classifiers FR = FRClassifiers.classifiers
classifiers GA = GAClassifiers.classifiers
classifiers HE = HEClassifiers.classifiers
classifiers HR = HRClassifiers.classifiers
classifiers HU = HUClassifiers.classifiers
classifiers ID = IDClassifiers.classifiers
classifiers IT = ITClassifiers.classifiers
classifiers JA = JAClassifiers.classifiers
Expand Down
22 changes: 22 additions & 0 deletions Duckling/Ranking/Classifiers/HU.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.

-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.HU (classifiers) where
import Prelude
import Duckling.Ranking.Types
import qualified Data.HashMap.Strict as HashMap
import Data.String

classifiers :: Classifiers
classifiers = HashMap.fromList []
2 changes: 2 additions & 0 deletions Duckling/Rules.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import qualified Duckling.Rules.FR as FRRules
import qualified Duckling.Rules.GA as GARules
import qualified Duckling.Rules.HE as HERules
import qualified Duckling.Rules.HR as HRRules
import qualified Duckling.Rules.HU as HURules
import qualified Duckling.Rules.ID as IDRules
import qualified Duckling.Rules.IT as ITRules
import qualified Duckling.Rules.JA as JARules
Expand Down Expand Up @@ -79,6 +80,7 @@ langRules FR = FRRules.rules
langRules GA = GARules.rules
langRules HE = HERules.rules
langRules HR = HRRules.rules
langRules HU = HURules.rules
langRules ID = IDRules.rules
langRules IT = ITRules.rules
langRules JA = JARules.rules
Expand Down
Loading