25
25
from synapse .server import HomeServer
26
26
from synapse .storage import DataStore
27
27
from synapse .storage .background_updates import _BackgroundUpdateHandler
28
+ from synapse .storage .databases .main import user_directory
29
+ from synapse .storage .databases .main .user_directory import (
30
+ _parse_words_with_icu ,
31
+ _parse_words_with_regex ,
32
+ )
28
33
from synapse .storage .roommember import ProfileInfo
29
34
from synapse .util import Clock
30
35
42
47
BOB = "@bob:b"
43
48
BOBBY = "@bobby:a"
44
49
# The localpart isn't 'Bela' on purpose so we can test looking up display names.
45
- BELA = "@somenickname:a "
50
+ BELA = "@somenickname:example.org "
46
51
47
52
48
53
class GetUserDirectoryTables :
@@ -423,6 +428,8 @@ async def mocked_process_users(*args: Any, **kwargs: Any) -> int:
423
428
424
429
425
430
class UserDirectoryStoreTestCase (HomeserverTestCase ):
431
+ use_icu = False
432
+
426
433
def prepare (self , reactor : MemoryReactor , clock : Clock , hs : HomeServer ) -> None :
427
434
self .store = hs .get_datastores ().main
428
435
@@ -434,6 +441,12 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
434
441
self .get_success (self .store .update_profile_in_user_dir (BELA , "Bela" , None ))
435
442
self .get_success (self .store .add_users_in_public_rooms ("!room:id" , (ALICE , BOB )))
436
443
444
+ self ._restore_use_icu = user_directory .USE_ICU
445
+ user_directory .USE_ICU = self .use_icu
446
+
447
+ def tearDown (self ) -> None :
448
+ user_directory .USE_ICU = self ._restore_use_icu
449
+
437
450
def test_search_user_dir (self ) -> None :
438
451
# normally when alice searches the directory she should just find
439
452
# bob because bobby doesn't share a room with her.
@@ -478,6 +491,26 @@ def test_search_user_dir_stop_words(self) -> None:
478
491
{"user_id" : BELA , "display_name" : "Bela" , "avatar_url" : None },
479
492
)
480
493
494
+ @override_config ({"user_directory" : {"search_all_users" : True }})
495
+ def test_search_user_dir_start_of_user_id (self ) -> None :
496
+ """Tests that a user can look up another user by searching for the start
497
+ of their user ID.
498
+ """
499
+ r = self .get_success (self .store .search_user_dir (ALICE , "somenickname:exa" , 10 ))
500
+ self .assertFalse (r ["limited" ])
501
+ self .assertEqual (1 , len (r ["results" ]))
502
+ self .assertDictEqual (
503
+ r ["results" ][0 ],
504
+ {"user_id" : BELA , "display_name" : "Bela" , "avatar_url" : None },
505
+ )
506
+
507
+
508
+ class UserDirectoryStoreTestCaseWithIcu (UserDirectoryStoreTestCase ):
509
+ use_icu = True
510
+
511
+ if not icu :
512
+ skip = "Requires PyICU"
513
+
481
514
482
515
class UserDirectoryICUTestCase (HomeserverTestCase ):
483
516
if not icu :
@@ -513,3 +546,31 @@ def test_icu_word_boundary(self) -> None:
513
546
r ["results" ][0 ],
514
547
{"user_id" : ALICE , "display_name" : display_name , "avatar_url" : None },
515
548
)
549
+
550
+ def test_icu_word_boundary_punctuation (self ) -> None :
551
+ """
552
+ Tests the behaviour of punctuation with the ICU tokeniser.
553
+
554
+ Seems to depend on underlying version of ICU.
555
+ """
556
+
557
+ # Note: either tokenisation is fine, because Postgres actually splits
558
+ # words itself afterwards.
559
+ self .assertIn (
560
+ _parse_words_with_icu ("lazy'fox jumped:over the.dog" ),
561
+ (
562
+ # ICU 66 on Ubuntu 20.04
563
+ ["lazy'fox" , "jumped" , "over" , "the" , "dog" ],
564
+ # ICU 70 on Ubuntu 22.04
565
+ ["lazy'fox" , "jumped:over" , "the.dog" ],
566
+ ),
567
+ )
568
+
569
+ def test_regex_word_boundary_punctuation (self ) -> None :
570
+ """
571
+ Tests the behaviour of punctuation with the non-ICU tokeniser
572
+ """
573
+ self .assertEqual (
574
+ _parse_words_with_regex ("lazy'fox jumped:over the.dog" ),
575
+ ["lazy" , "fox" , "jumped" , "over" , "the" , "dog" ],
576
+ )
0 commit comments