-
Notifications
You must be signed in to change notification settings - Fork 149
/
Copy pathlf_hash.cc
848 lines (767 loc) · 27.1 KB
/
lf_hash.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
/* Copyright (c) 2006, 2024, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2.0,
as published by the Free Software Foundation.
This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.
Without limiting anything contained in the foregoing, this file,
which is part of C Driver for MySQL (Connector/C), is also subject to the
Universal FOSS Exception, version 1.0, a copy of which can be found at
http://oss.oracle.com/licenses/universal-foss-exception.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License, version 2.0, for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
/**
@file mysys/lf_hash.cc
extensible hash
@todo
try to get rid of dummy nodes ?
for non-unique hash, count only _distinct_ values
(but how to do it in lf_hash_delete ?)
*/
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include <sys/types.h>
#include <atomic>
#include "lf.h"
#include "m_ctype.h"
#include "my_atomic.h"
#include "my_bit.h"
#include "my_compiler.h"
#include "my_inttypes.h"
#include "my_sys.h"
#include "mysql/service_mysql_alloc.h"
#include "mysys/mysys_priv.h"
#include "template_utils.h"
LF_REQUIRE_PINS(3)
/* An element of the list */
struct LF_SLIST {
std::atomic<LF_SLIST *>
link; /* a pointer to the next element in a listand a flag */
uint32 hashnr; /* reversed hash number, for sorting */
const uchar *key;
size_t keylen;
/*
data is stored here, directly after the keylen.
thus the pointer to data is (void*)(slist_element_ptr+1)
*/
};
const int LF_HASH_OVERHEAD = sizeof(LF_SLIST);
/*
a structure to pass the context (pointers two the three successive elements
in a list) from my_lfind to linsert/ldelete
*/
typedef struct {
std::atomic<LF_SLIST *> *prev;
LF_SLIST *curr, *next;
} CURSOR;
/*
the last bit in LF_SLIST::link is a "deleted" flag.
the helper functions below convert it to a pure pointer or a pure flag
*/
template <class T>
static inline T *PTR(T *ptr) {
intptr_t i = reinterpret_cast<intptr_t>(ptr);
i &= (intptr_t)~1;
return reinterpret_cast<T *>(i);
}
template <class T>
static inline bool DELETED(T *ptr) {
const intptr_t i = reinterpret_cast<intptr_t>(ptr);
return i & 1;
}
template <class T>
static inline T *SET_DELETED(T *ptr) {
intptr_t i = reinterpret_cast<intptr_t>(ptr);
i |= 1;
return reinterpret_cast<T *>(i);
}
/*
DESCRIPTION
Search for hashnr/key/keylen in the list starting from 'head' and
position the cursor. The list is ORDER BY hashnr, key
RETURN
0 - not found
1 - found
NOTE
cursor is positioned in either case
pins[0..2] are used, they are NOT removed on return
*/
static int my_lfind(std::atomic<LF_SLIST *> *head, lf_cmp_func *cmp_func,
CHARSET_INFO *cs, uint32 hashnr, const uchar *key,
size_t keylen, CURSOR *cursor, LF_PINS *pins) {
uint32 cur_hashnr;
const uchar *cur_key;
size_t cur_keylen;
LF_SLIST *link;
retry:
cursor->prev = head;
do /* PTR() isn't necessary below, head is a dummy node */
{
cursor->curr = (LF_SLIST *)(*cursor->prev);
lf_pin(pins, 1, cursor->curr);
} while (*cursor->prev != cursor->curr && LF_BACKOFF);
for (;;) {
if (unlikely(!cursor->curr)) {
return 0; /* end of the list */
}
do {
/* QQ: XXX or goto retry ? */
link = cursor->curr->link.load();
cursor->next = PTR(link);
lf_pin(pins, 0, cursor->next);
} while (link != cursor->curr->link && LF_BACKOFF);
cur_hashnr = cursor->curr->hashnr;
cur_key = cursor->curr->key;
cur_keylen = cursor->curr->keylen;
if (*cursor->prev != cursor->curr) {
(void)LF_BACKOFF;
goto retry;
}
if (!DELETED(link)) {
if (cur_hashnr >= hashnr) {
if (cur_hashnr > hashnr) {
return 0;
}
int r;
if (cmp_func != nullptr) {
/*
For the dummy node added by initialize_bucket(),
the key length is always zero.
So we need to handle such keys correctly even
if they can't be compared by compare function.
This code also handles empty keys belonging to real records
(if such empty keys are possible) by ordering them
before any real key.
Note that dummy nodes and real records are hashed into
separate values, due to the odd/even bit in the hash,
so that dummy nodes and real records belong to different
hash value spaces.
As a result, the compare function will never be called
to compare a dummy and a real record.
*/
if (cur_keylen == 0) {
if (keylen != 0) {
/* cur_key (empty) < key */
r = -1;
} else {
/* cur_key (empty) == key (empty) */
r = 0;
}
} else if (keylen == 0) {
/* cur_key > key (empty) */
r = +1;
} else {
r = (*cmp_func)(cur_key, cur_keylen, key, keylen);
}
} else {
/*
WARNING,
Works for both normal and dummy nodes with their empty keys.
*/
r = my_strnncoll(cs, cur_key, cur_keylen, key, keylen);
}
if (r >= 0) {
return !r;
}
}
cursor->prev = &(cursor->curr->link);
lf_pin(pins, 2, cursor->curr);
} else {
/*
we found a deleted node - be nice, help the other thread
and remove this deleted node
*/
if (atomic_compare_exchange_strong(cursor->prev, &cursor->curr,
cursor->next)) {
lf_pinbox_free(pins, cursor->curr);
} else {
(void)LF_BACKOFF;
goto retry;
}
}
cursor->curr = cursor->next;
lf_pin(pins, 1, cursor->curr);
}
}
/**
Search for list element satisfying condition specified by match
function and position cursor on it.
@param head Head of the list to search in.
@param first_hashnr Hash value to start search from.
@param last_hashnr Hash value to stop search after.
@param match Match function.
@param cursor Cursor to be position.
@param pins LF_PINS for the calling thread to be used during
search for pinning result.
@param match_arg Argument passed to match function.
@retval 0 - not found
@retval 1 - found
*/
static int my_lfind_match(std::atomic<LF_SLIST *> *head, uint32 first_hashnr,
uint32 last_hashnr, lf_hash_match_func *match,
CURSOR *cursor, LF_PINS *pins, void *match_arg) {
uint32 cur_hashnr;
LF_SLIST *link;
retry:
cursor->prev = head;
do /* PTR() isn't necessary below, head is a dummy node */
{
cursor->curr = (LF_SLIST *)(*cursor->prev);
lf_pin(pins, 1, cursor->curr);
} while (*cursor->prev != cursor->curr && LF_BACKOFF);
for (;;) {
if (unlikely(!cursor->curr)) {
return 0; /* end of the list */
}
do {
/* QQ: XXX or goto retry ? */
link = cursor->curr->link.load();
cursor->next = PTR(link);
lf_pin(pins, 0, cursor->next);
} while (link != cursor->curr->link && LF_BACKOFF);
cur_hashnr = cursor->curr->hashnr;
if (*cursor->prev != cursor->curr) {
(void)LF_BACKOFF;
goto retry;
}
if (!DELETED(link)) {
if (cur_hashnr >= first_hashnr) {
if (cur_hashnr > last_hashnr) {
return 0;
}
if (cur_hashnr & 1) {
/* Normal node. Check if element matches condition. */
if ((*match)((uchar *)(cursor->curr + 1), match_arg)) {
return 1;
}
} else {
/*
Dummy node. Nothing to check here.
Still thanks to the fact that dummy nodes are never deleted we
can save it as a safe place to restart iteration if ever needed.
*/
head = &cursor->curr->link;
}
}
cursor->prev = &(cursor->curr->link);
lf_pin(pins, 2, cursor->curr);
} else {
/*
we found a deleted node - be nice, help the other thread
and remove this deleted node
*/
if (atomic_compare_exchange_strong(cursor->prev, &cursor->curr,
cursor->next)) {
lf_pinbox_free(pins, cursor->curr);
} else {
(void)LF_BACKOFF;
goto retry;
}
}
cursor->curr = cursor->next;
lf_pin(pins, 1, cursor->curr);
}
}
/*
DESCRIPTION
insert a 'node' in the list that starts from 'head' in the correct
position (as found by my_lfind)
RETURN
0 - inserted
not 0 - a pointer to a duplicate (not pinned and thus unusable)
NOTE
it uses pins[0..2], on return all pins are removed.
if there're nodes with the same key value, a new node is added before them.
*/
static LF_SLIST *linsert(std::atomic<LF_SLIST *> *head, lf_cmp_func *cmp_func,
CHARSET_INFO *cs, LF_SLIST *node, LF_PINS *pins,
uint flags) {
CURSOR cursor;
int res;
for (;;) {
if (my_lfind(head, cmp_func, cs, node->hashnr, node->key, node->keylen,
&cursor, pins) &&
(flags & LF_HASH_UNIQUE)) {
res = 0; /* duplicate found */
break;
} else {
node->link = cursor.curr;
assert(node->link != node); /* no circular references */
assert(cursor.prev != &node->link); /* no circular references */
if (atomic_compare_exchange_strong(cursor.prev, &cursor.curr, node)) {
res = 1; /* inserted ok */
break;
}
}
}
lf_unpin(pins, 0);
lf_unpin(pins, 1);
lf_unpin(pins, 2);
/*
Note that cursor.curr is not pinned here and the pointer is unreliable,
the object may disappear anytime. But if it points to a dummy node, the
pointer is safe, because dummy nodes are never freed - initialize_bucket()
uses this fact.
*/
return res ? nullptr : cursor.curr;
}
/*
DESCRIPTION
deletes a node as identified by hashnr/keey/keylen from the list
that starts from 'head'
RETURN
0 - ok
1 - not found
NOTE
it uses pins[0..2], on return all pins are removed.
*/
static int ldelete(std::atomic<LF_SLIST *> *head, lf_cmp_func *cmp_func,
CHARSET_INFO *cs, uint32 hashnr, const uchar *key,
uint keylen, LF_PINS *pins) {
CURSOR cursor;
int res;
for (;;) {
if (!my_lfind(head, cmp_func, cs, hashnr, key, keylen, &cursor, pins)) {
res = 1; /* not found */
break;
} else {
/* mark the node deleted */
if (atomic_compare_exchange_strong(&cursor.curr->link, &cursor.next,
SET_DELETED(cursor.next))) {
/* and remove it from the list */
if (atomic_compare_exchange_strong(cursor.prev, &cursor.curr,
cursor.next)) {
lf_pinbox_free(pins, cursor.curr);
} else {
/*
somebody already "helped" us and removed the node ?
Let's check if we need to help that someone too!
(to ensure the number of "set DELETED flag" actions
is equal to the number of "remove from the list" actions)
*/
my_lfind(head, cmp_func, cs, hashnr, key, keylen, &cursor, pins);
}
res = 0;
break;
}
}
}
lf_unpin(pins, 0);
lf_unpin(pins, 1);
lf_unpin(pins, 2);
return res;
}
/*
DESCRIPTION
searches for a node as identified by hashnr/keey/keylen in the list
that starts from 'head'
RETURN
0 - not found
node - found
NOTE
it uses pins[0..2], on return the pin[2] keeps the node found
all other pins are removed.
*/
static LF_SLIST *my_lsearch(std::atomic<LF_SLIST *> *head,
lf_cmp_func *cmp_func, CHARSET_INFO *cs,
uint32 hashnr, const uchar *key, uint keylen,
LF_PINS *pins) {
CURSOR cursor;
int res = my_lfind(head, cmp_func, cs, hashnr, key, keylen, &cursor, pins);
if (res) {
lf_pin(pins, 2, cursor.curr);
}
lf_unpin(pins, 0);
lf_unpin(pins, 1);
return res ? cursor.curr : nullptr;
}
static inline const uchar *hash_key(const LF_HASH *hash, const uchar *record,
size_t *length) {
if (hash->get_key) {
return (*hash->get_key)(record, length);
}
*length = hash->key_length;
return record + hash->key_offset;
}
/*
Compute the hash key value from the raw key.
@note, that the hash value is limited to 2^31, because we need one
bit to distinguish between normal and dummy nodes.
*/
static inline uint calc_hash(LF_HASH *hash, const uchar *key, size_t keylen) {
return (hash->hash_function(hash, key, keylen)) & INT_MAX32;
}
#define MAX_LOAD 1.0 /* average number of elements in a bucket */
static int initialize_bucket(LF_HASH *, std::atomic<LF_SLIST *> *, uint,
LF_PINS *);
/**
Adaptor function which allows to use hash function from character
set with LF_HASH.
*/
static uint cset_hash_sort_adapter(const LF_HASH *hash, const uchar *key,
size_t length) {
uint64 nr1 = 1, nr2 = 4;
hash->charset->coll->hash_sort(hash->charset, key, length, &nr1, &nr2);
return (uint)nr1;
}
/*
Initializes lf_hash, the arguments are compatible with hash_init
@note element_size sets both the size of an allocated memory block for
lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically,
however, they are the same. But LF_HASH::element_size can be decreased
after lf_hash_init, and then lf_alloc will allocate larger block that
lf_hash_insert will copy over. It is desirable if part of the element
is expensive to initialize - for example, if there is a mutex or
DYNAMIC_ARRAY. In this case, they should be initialized in the
LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them.
See wt_init() for example.
As an alternative to using the above trick with decreasing
LF_HASH::element_size, one can provide an "initialize" hook that will finish
initialization of an object provided by LF_ALLOCATOR and set the element key
from the object passed as parameter to lf_hash_insert instead of doing simple
memcpy.
*/
void lf_hash_init_impl(LF_HASH *hash, uint element_size, uint flags,
uint key_offset, uint key_length,
hash_get_key_function get_key, CHARSET_INFO *charset,
lf_hash_func *hash_function, lf_cmp_func *cmp_function,
lf_allocator_func *ctor, lf_allocator_func *dtor,
lf_hash_init_func *init) {
lf_alloc_init2(&hash->alloc, sizeof(LF_SLIST) + element_size,
offsetof(LF_SLIST, key), ctor, dtor);
lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
hash->size = 1;
hash->count = 0;
hash->element_size = element_size;
hash->flags = flags;
hash->key_offset = key_offset;
hash->key_length = key_length;
hash->get_key = get_key;
hash->cmp_function = cmp_function;
if (cmp_function != nullptr) {
assert(hash_function != nullptr);
assert(charset == nullptr);
hash->hash_function = hash_function;
hash->charset = nullptr;
} else {
hash->hash_function =
hash_function ? hash_function : cset_hash_sort_adapter;
hash->charset = charset ? charset : &my_charset_bin;
}
hash->initialize = init;
/*
If a get_key() call back is provided:
- key_offset should be 0
- key_length should be 0
- key_offset and key_length are not used.
If no get_key() call back is provided:
- key_length should be > 0
*/
assert(get_key ? !key_offset && !key_length : key_length);
}
void lf_hash_init2(LF_HASH *hash, uint element_size, uint flags,
uint key_offset, uint key_length,
hash_get_key_function get_key, CHARSET_INFO *charset,
lf_hash_func *hash_function, lf_allocator_func *ctor,
lf_allocator_func *dtor, lf_hash_init_func *init) {
lf_hash_init_impl(hash, element_size, flags, key_offset, key_length, get_key,
charset, hash_function, nullptr, ctor, dtor, init);
}
void lf_hash_init3(LF_HASH *hash, uint element_size, uint flags,
hash_get_key_function get_key, lf_hash_func *hash_function,
lf_cmp_func *cmp_function, lf_allocator_func *ctor,
lf_allocator_func *dtor, lf_hash_init_func *init) {
lf_hash_init_impl(hash, element_size, flags, 0, 0, get_key, nullptr,
hash_function, cmp_function, ctor, dtor, init);
}
void lf_hash_destroy(LF_HASH *hash) {
LF_SLIST *el, **head = (LF_SLIST **)lf_dynarray_value(&hash->array, 0);
if (unlikely(!head)) {
lf_alloc_destroy(&hash->alloc);
return;
}
el = *head;
while (el) {
LF_SLIST *next = el->link;
if (el->hashnr & 1) {
lf_alloc_direct_free(&hash->alloc, el); /* normal node */
} else {
my_free(el); /* dummy node */
}
el = (LF_SLIST *)next;
}
lf_alloc_destroy(&hash->alloc);
lf_dynarray_destroy(&hash->array);
}
/*
DESCRIPTION
inserts a new element to a hash. it will have a _copy_ of
data, not a pointer to it.
RETURN
0 - inserted
1 - didn't (unique key conflict)
-1 - out of memory
NOTE
see linsert() for pin usage notes
*/
int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) {
int csize, bucket, hashnr;
LF_SLIST *node;
std::atomic<LF_SLIST *> *el;
node = (LF_SLIST *)lf_alloc_new(pins);
if (unlikely(!node)) {
return -1;
}
uchar *extra_data =
(uchar *)(node + 1); // Stored immediately after the node.
if (hash->initialize) {
(*hash->initialize)(extra_data, (const uchar *)data);
} else {
memcpy(extra_data, data, hash->element_size);
}
node->key = hash_key(hash, (uchar *)(node + 1), &node->keylen);
hashnr = calc_hash(hash, node->key, node->keylen);
bucket = hashnr % hash->size;
el = static_cast<std::atomic<LF_SLIST *> *>(
lf_dynarray_lvalue(&hash->array, bucket));
if (unlikely(!el)) {
lf_pinbox_free(pins, node);
return -1;
}
if (el->load() == nullptr &&
unlikely(initialize_bucket(hash, el, bucket, pins))) {
lf_pinbox_free(pins, node);
return -1;
}
node->hashnr = my_reverse_bits(hashnr) | 1; /* normal node */
if (linsert(el, hash->cmp_function, hash->charset, node, pins, hash->flags)) {
lf_pinbox_free(pins, node);
return 1;
}
csize = hash->size;
if ((hash->count.fetch_add(1) + 1.0) / csize > MAX_LOAD) {
atomic_compare_exchange_strong(&hash->size, &csize, csize * 2);
}
return 0;
}
/*
DESCRIPTION
deletes an element with the given key from the hash (if a hash is
not unique and there're many elements with this key - the "first"
matching element is deleted)
RETURN
0 - deleted
1 - didn't (not found)
-1 - out of memory
NOTE
see ldelete() for pin usage notes
*/
int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) {
std::atomic<LF_SLIST *> *el;
uint bucket,
hashnr = calc_hash(hash, pointer_cast<const uchar *>(key), keylen);
bucket = hashnr % hash->size;
el = static_cast<std::atomic<LF_SLIST *> *>(
lf_dynarray_lvalue(&hash->array, bucket));
if (unlikely(!el)) {
return -1;
}
/*
note that we still need to initialize_bucket here,
we cannot return "node not found", because an old bucket of that
node may've been split and the node was assigned to a new bucket
that was never accessed before and thus is not initialized.
*/
if (el->load() == nullptr &&
unlikely(initialize_bucket(hash, el, bucket, pins))) {
return -1;
}
if (ldelete(el, hash->cmp_function, hash->charset,
my_reverse_bits(hashnr) | 1, pointer_cast<const uchar *>(key),
keylen, pins)) {
return 1;
}
--hash->count;
return 0;
}
/**
Find hash element corresponding to the key.
@param hash The hash to search element in.
@param pins Pins for the calling thread which were earlier
obtained from this hash using lf_hash_get_pins().
@param key Key
@param keylen Key length
@retval A pointer to an element with the given key (if a hash is not unique
and there're many elements with this key - the "first" matching
element).
@retval NULL - if nothing is found
@retval MY_LF_ERRPTR - if OOM
@note Uses pins[0..2]. On return pins[0..1] are removed and pins[2]
is used to pin object found. It is also not removed in case when
object is not found/error occurs but pin value is undefined in
this case.
So calling lf_hash_unpin() is mandatory after call to this function
in case of both success and failure.
@sa my_lsearch().
*/
void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key,
uint keylen) {
std::atomic<LF_SLIST *> *el;
LF_SLIST *found;
uint bucket,
hashnr = calc_hash(hash, pointer_cast<const uchar *>(key), keylen);
bucket = hashnr % hash->size;
el = static_cast<std::atomic<LF_SLIST *> *>(
lf_dynarray_lvalue(&hash->array, bucket));
if (unlikely(!el)) {
return MY_LF_ERRPTR;
}
if (el->load() == nullptr &&
unlikely(initialize_bucket(hash, el, bucket, pins))) {
return MY_LF_ERRPTR;
}
found = my_lsearch(el, hash->cmp_function, hash->charset,
my_reverse_bits(hashnr) | 1,
pointer_cast<const uchar *>(key), keylen, pins);
return found ? found + 1 : nullptr;
}
/**
Find random hash element which satisfies condition specified by
match function.
@param hash Hash to search element in.
@param pins Pins for calling thread to be used during search
and for pinning its result.
@param match Pointer to match function. This function takes
pointer to object stored in hash as parameter
and returns 0 if object doesn't satisfy its
condition (and non-0 value if it does).
@param rand_val Random value to be used for selecting hash
bucket from which search in sort-ordered
list needs to be started.
@param match_arg Argument passed to match function.
@retval A pointer to a random element matching condition.
@retval NULL - if nothing is found
@retval MY_LF_ERRPTR - OOM.
@note This function follows the same pinning protocol as lf_hash_search(),
i.e. uses pins[0..2]. On return pins[0..1] are removed and pins[2]
is used to pin object found. It is also not removed in case when
object is not found/error occurs but its value is undefined in
this case.
So calling lf_hash_unpin() is mandatory after call to this function
in case of both success and failure.
*/
void *lf_hash_random_match(LF_HASH *hash, LF_PINS *pins,
lf_hash_match_func *match, uint rand_val,
void *match_arg) {
/* Convert random value to valid hash value. */
uint hashnr = (rand_val & INT_MAX32);
uint bucket;
uint32 rev_hashnr;
std::atomic<LF_SLIST *> *el;
CURSOR cursor;
int res;
bucket = hashnr % hash->size;
rev_hashnr = my_reverse_bits(hashnr);
el = static_cast<std::atomic<LF_SLIST *> *>(
lf_dynarray_lvalue(&hash->array, bucket));
if (unlikely(!el)) {
return MY_LF_ERRPTR;
}
/*
Bucket might be totally empty if it has not been accessed since last
time LF_HASH::size has been increased. In this case we initialize it
by inserting dummy node for this bucket to the correct position in
split-ordered list. This should help future lf_hash_* calls trying to
access the same bucket.
*/
if (el->load() == nullptr &&
unlikely(initialize_bucket(hash, el, bucket, pins))) {
return MY_LF_ERRPTR;
}
/*
To avoid bias towards the first matching element in the bucket, we start
looking for elements with inversed hash value greater or equal than
inversed value of our random hash.
*/
res = my_lfind_match(el, rev_hashnr | 1, UINT_MAX32, match, &cursor, pins,
match_arg);
if (!res && hashnr != 0) {
/*
We have not found matching element - probably we were too close to
the tail of our split-ordered list. To avoid bias against elements
at the head of the list we restart our search from its head. Unless
we were already searching from it.
To avoid going through elements at which we have already looked
twice we stop once we reach element from which we have begun our
first search.
*/
el = static_cast<std::atomic<LF_SLIST *> *>(
lf_dynarray_lvalue(&hash->array, 0));
if (unlikely(!el)) {
return MY_LF_ERRPTR;
}
res = my_lfind_match(el, 1, rev_hashnr, match, &cursor, pins, match_arg);
}
if (res) {
lf_pin(pins, 2, cursor.curr);
}
lf_unpin(pins, 0);
lf_unpin(pins, 1);
return res ? cursor.curr + 1 : nullptr;
}
static const uchar *dummy_key = pointer_cast<const uchar *>("");
/*
RETURN
0 - ok
-1 - out of memory
*/
static int initialize_bucket(LF_HASH *hash, std::atomic<LF_SLIST *> *node,
uint bucket, LF_PINS *pins) {
uint parent = my_clear_highest_bit(bucket);
LF_SLIST *dummy =
(LF_SLIST *)my_malloc(key_memory_lf_slist, sizeof(LF_SLIST), MYF(MY_WME));
if (unlikely(!dummy)) {
return -1;
}
LF_SLIST *tmp = nullptr, *cur;
std::atomic<LF_SLIST *> *el = static_cast<std::atomic<LF_SLIST *> *>(
lf_dynarray_lvalue(&hash->array, parent));
if (unlikely(!el)) {
my_free(dummy);
return -1;
}
if (el->load() == nullptr && bucket &&
unlikely(initialize_bucket(hash, el, parent, pins))) {
my_free(dummy);
return -1;
}
dummy->hashnr = my_reverse_bits(bucket) | 0; /* dummy node */
dummy->key = dummy_key;
dummy->keylen = 0;
if ((cur = linsert(el, hash->cmp_function, hash->charset, dummy, pins,
LF_HASH_UNIQUE))) {
my_free(dummy);
dummy = cur;
}
atomic_compare_exchange_strong(node, &tmp, dummy);
/*
note that if the CAS above failed (after linsert() succeeded),
it would mean that some other thread has executed linsert() for
the same dummy node, its linsert() failed, it picked up our
dummy node (in "dummy= cur") and executed the same CAS as above.
Which means that even if CAS above failed we don't need to retry,
and we should not free(dummy) - there's no memory leak here
*/
return 0;
}