@@ -58,43 +58,93 @@ BOOST_AUTO_TEST_CASE(testPartOfSpeech) {
5858 dict.partOfSpeech (" a" ));
5959}
6060
61- BOOST_AUTO_TEST_CASE (testWeightingFunctors ) {
61+ BOOST_AUTO_TEST_CASE (testSimpleWeightingFunctors ) {
6262 {
6363 ml::core::CWordDictionary::TWeightAll2 weighter;
6464
65- BOOST_REQUIRE_EQUAL (size_t (0 ), weighter (ml::core::CWordDictionary::E_NotInDictionary));
66- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_UnknownPart));
67- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Noun));
68- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Plural));
69- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Verb));
70- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Adjective));
71- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Adverb));
72- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Conjunction));
73- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Preposition));
74- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Interjection));
75- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Pronoun));
76- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_DefiniteArticle));
77- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_IndefiniteArticle));
65+ BOOST_REQUIRE_EQUAL (0 , weighter (ml::core::CWordDictionary::E_NotInDictionary));
66+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_UnknownPart));
67+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Noun));
68+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Plural));
69+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Verb));
70+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Adjective));
71+ weighter.reset (); // should make no difference
72+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Adverb));
73+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Conjunction));
74+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Preposition));
75+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Interjection));
76+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Pronoun));
77+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_DefiniteArticle));
78+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_IndefiniteArticle));
79+ // Any given token always gives the same weight, so min/max matching
80+ // should always be the same as the original
81+ for (std::size_t weight = 1 ; weight < 10 ; ++weight) {
82+ BOOST_REQUIRE_EQUAL (weight, weighter.minMatchingWeight (weight));
83+ BOOST_REQUIRE_EQUAL (weight, weighter.maxMatchingWeight (weight));
84+ }
7885 }
7986 {
8087 ml::core::CWordDictionary::TWeightVerbs5Other2 weighter;
8188
82- BOOST_REQUIRE_EQUAL (size_t (0 ), weighter (ml::core::CWordDictionary::E_NotInDictionary));
83- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_UnknownPart));
84- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Noun));
85- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Plural));
86- BOOST_REQUIRE_EQUAL (size_t (5 ), weighter (ml::core::CWordDictionary::E_Verb));
87- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Adjective));
88- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Adverb));
89- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Conjunction));
90- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Preposition));
91- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Interjection));
92- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_Pronoun));
93- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_DefiniteArticle));
94- BOOST_REQUIRE_EQUAL (size_t (2 ), weighter (ml::core::CWordDictionary::E_IndefiniteArticle));
89+ BOOST_REQUIRE_EQUAL (0 , weighter (ml::core::CWordDictionary::E_NotInDictionary));
90+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_UnknownPart));
91+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Noun));
92+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Plural));
93+ weighter.reset (); // should make no difference
94+ BOOST_REQUIRE_EQUAL (5 , weighter (ml::core::CWordDictionary::E_Verb));
95+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Adjective));
96+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Adverb));
97+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Conjunction));
98+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Preposition));
99+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Interjection));
100+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Pronoun));
101+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_DefiniteArticle));
102+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_IndefiniteArticle));
103+ // Any given token always gives the same weight, so min/max matching
104+ // should always be the same as the original
105+ for (std::size_t weight = 1 ; weight < 10 ; ++weight) {
106+ BOOST_REQUIRE_EQUAL (weight, weighter.minMatchingWeight (weight));
107+ BOOST_REQUIRE_EQUAL (weight, weighter.maxMatchingWeight (weight));
108+ }
95109 }
96110}
97111
112+ BOOST_AUTO_TEST_CASE (testAdjacencyDependentWeightingFunctor) {
113+ ml::core::CWordDictionary::TWeightVerbs5Other2AdjacentBoost6 weighter;
114+
115+ BOOST_REQUIRE_EQUAL (0 , weighter (ml::core::CWordDictionary::E_NotInDictionary));
116+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_UnknownPart));
117+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Noun));
118+ BOOST_REQUIRE_EQUAL (12 , weighter (ml::core::CWordDictionary::E_Plural));
119+ BOOST_REQUIRE_EQUAL (30 , weighter (ml::core::CWordDictionary::E_Verb));
120+ weighter.reset ();
121+ // Explicit reset stops adjacency multiplier
122+ BOOST_REQUIRE_EQUAL (5 , weighter (ml::core::CWordDictionary::E_Verb));
123+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Adjective));
124+ BOOST_REQUIRE_EQUAL (12 , weighter (ml::core::CWordDictionary::E_Adverb));
125+ BOOST_REQUIRE_EQUAL (12 , weighter (ml::core::CWordDictionary::E_Conjunction));
126+ BOOST_REQUIRE_EQUAL (0 , weighter (ml::core::CWordDictionary::E_NotInDictionary));
127+ // Non-dictionary word stops adjacency multiplier
128+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Noun));
129+ BOOST_REQUIRE_EQUAL (5 , weighter (ml::core::CWordDictionary::E_Verb));
130+ weighter.reset ();
131+ // Explicit reset stops adjacency multiplier
132+ BOOST_REQUIRE_EQUAL (2 , weighter (ml::core::CWordDictionary::E_Adjective));
133+
134+ // Of the possible weights, 3 could map to 13 and 6 to 31 depending on
135+ // whether adjacency weighting takes place
136+ BOOST_REQUIRE_EQUAL (1 , weighter.minMatchingWeight (1 ));
137+ BOOST_REQUIRE_EQUAL (1 , weighter.maxMatchingWeight (1 ));
138+ BOOST_REQUIRE_EQUAL (3 , weighter.minMatchingWeight (3 ));
139+ BOOST_REQUIRE_EQUAL (13 , weighter.maxMatchingWeight (3 ));
140+ BOOST_REQUIRE_EQUAL (6 , weighter.minMatchingWeight (6 ));
141+ BOOST_REQUIRE_EQUAL (31 , weighter.maxMatchingWeight (6 ));
142+ BOOST_REQUIRE_EQUAL (3 , weighter.minMatchingWeight (13 ));
143+ BOOST_REQUIRE_EQUAL (13 , weighter.maxMatchingWeight (13 ));
144+ BOOST_REQUIRE_EQUAL (6 , weighter.minMatchingWeight (31 ));
145+ BOOST_REQUIRE_EQUAL (31 , weighter.maxMatchingWeight (31 ));
146+ }
147+
98148// Disabled because it doesn't assert anything
99149// Can be run on an ad hoc basis if performance is of interest
100150BOOST_AUTO_TEST_CASE (testPerformance, *boost::unit_test::disabled ()) {
@@ -104,8 +154,8 @@ BOOST_AUTO_TEST_CASE(testPerformance, *boost::unit_test::disabled()) {
104154 LOG_INFO (<< " Starting word dictionary throughput test at "
105155 << ml::core::CTimeUtils::toTimeString (start));
106156
107- static const size_t TEST_SIZE (100000 );
108- for (size_t count = 0 ; count < TEST_SIZE; ++count) {
157+ static const std:: size_t TEST_SIZE (100000 );
158+ for (std:: size_t count = 0 ; count < TEST_SIZE; ++count) {
109159 dict.isInDictionary (" hello" );
110160 dict.isInDictionary (" Hello" );
111161 dict.isInDictionary (" HELLO" );
0 commit comments