Skip to content

Commit 06024f2

Browse files
Alexandre OlivaLinus Torvalds
authored andcommitted
[PATCH] x86-64: bitops fix for -Os
This fixes the x86-64 find_[first|next]_zero_bit() function for the end-of-range case. It didn't test for a zero size, and the "rep scas" would do entirely the wrong thing. Signed-off-by: Alexandre Oliva <oliva@lsd.ic.unicamp.br> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1 parent cfa024f commit 06024f2

File tree

1 file changed

+50
-16
lines changed

1 file changed

+50
-16
lines changed

arch/x86_64/lib/bitops.c

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,23 @@
55
#undef find_first_bit
66
#undef find_next_bit
77

8-
/**
9-
* find_first_zero_bit - find the first zero bit in a memory region
10-
* @addr: The address to start the search at
11-
* @size: The maximum size to search
12-
*
13-
* Returns the bit-number of the first zero bit, not the number of the byte
14-
* containing a bit.
15-
*/
16-
inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
8+
static inline long
9+
__find_first_zero_bit(const unsigned long * addr, unsigned long size)
1710
{
1811
long d0, d1, d2;
1912
long res;
2013

14+
/*
15+
* We must test the size in words, not in bits, because
16+
* otherwise incoming sizes in the range -63..-1 will not run
17+
* any scasq instructions, and then the flags used by the je
18+
* instruction will have whatever random value was in place
19+
* before. Nobody should call us like that, but
20+
* find_next_zero_bit() does when offset and size are at the
21+
* same word and it fails to find a zero itself.
22+
*/
23+
size += 63;
24+
size >>= 6;
2125
if (!size)
2226
return 0;
2327
asm volatile(
@@ -30,11 +34,29 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
3034
" shlq $3,%%rdi\n"
3135
" addq %%rdi,%%rdx"
3236
:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
33-
:"0" (0ULL), "1" ((size + 63) >> 6), "2" (addr), "3" (-1ULL),
34-
[addr] "r" (addr) : "memory");
37+
:"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
38+
[addr] "S" (addr) : "memory");
39+
/*
40+
* Any register would do for [addr] above, but GCC tends to
41+
* prefer rbx over rsi, even though rsi is readily available
42+
* and doesn't have to be saved.
43+
*/
3544
return res;
3645
}
3746

47+
/**
48+
* find_first_zero_bit - find the first zero bit in a memory region
49+
* @addr: The address to start the search at
50+
* @size: The maximum size to search
51+
*
52+
* Returns the bit-number of the first zero bit, not the number of the byte
53+
* containing a bit.
54+
*/
55+
long find_first_zero_bit(const unsigned long * addr, unsigned long size)
56+
{
57+
return __find_first_zero_bit (addr, size);
58+
}
59+
3860
/**
3961
* find_next_zero_bit - find the first zero bit in a memory region
4062
* @addr: The address to base the search on
@@ -43,7 +65,7 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size)
4365
*/
4466
long find_next_zero_bit (const unsigned long * addr, long size, long offset)
4567
{
46-
unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
68+
const unsigned long * p = addr + (offset >> 6);
4769
unsigned long set = 0;
4870
unsigned long res, bit = offset&63;
4971

@@ -63,8 +85,8 @@ long find_next_zero_bit (const unsigned long * addr, long size, long offset)
6385
/*
6486
* No zero yet, search remaining full words for a zero
6587
*/
66-
res = find_first_zero_bit ((const unsigned long *)p,
67-
size - 64 * (p - (unsigned long *) addr));
88+
res = __find_first_zero_bit (p, size - 64 * (p - addr));
89+
6890
return (offset + set + res);
6991
}
7092

@@ -74,6 +96,19 @@ __find_first_bit(const unsigned long * addr, unsigned long size)
7496
long d0, d1;
7597
long res;
7698

99+
/*
100+
* We must test the size in words, not in bits, because
101+
* otherwise incoming sizes in the range -63..-1 will not run
102+
* any scasq instructions, and then the flags used by the jz
103+
* instruction will have whatever random value was in place
104+
* before. Nobody should call us like that, but
105+
* find_next_bit() does when offset and size are at the same
106+
* word and it fails to find a one itself.
107+
*/
108+
size += 63;
109+
size >>= 6;
110+
if (!size)
111+
return 0;
77112
asm volatile(
78113
" repe; scasq\n"
79114
" jz 1f\n"
@@ -83,8 +118,7 @@ __find_first_bit(const unsigned long * addr, unsigned long size)
83118
" shlq $3,%%rdi\n"
84119
" addq %%rdi,%%rax"
85120
:"=a" (res), "=&c" (d0), "=&D" (d1)
86-
:"0" (0ULL),
87-
"1" ((size + 63) >> 6), "2" (addr),
121+
:"0" (0ULL), "1" (size), "2" (addr),
88122
[addr] "r" (addr) : "memory");
89123
return res;
90124
}

0 commit comments

Comments
 (0)