14
14
15
15
VLLM_RINGBUFFER_WARNING_INTERVAL = envs .VLLM_RINGBUFFER_WARNING_INTERVAL
16
16
17
+ # time to wait if the queue is full or empty
18
+ # if we sleep for too short, it will consume too much CPU
19
+ # if we sleep for too long, it will slow down the writer/reader
20
+ # 0.1 us is a good balance
21
+ RINGBUFFER_SLEEP_INTERVAL = 1e-7
22
+
17
23
logger = init_logger (__name__ )
18
24
19
25
@@ -145,28 +151,29 @@ def __init__(self, buffer: ShmRingBuffer, reader_rank: int):
145
151
@contextmanager
146
152
def acquire_write (self ):
147
153
assert self ._is_writer , "Only writers can acquire write"
148
- start_index = self .current_idx
149
- start_time = time .time ()
154
+ start_time = time .monotonic ()
150
155
n_warning = 1
151
156
while True :
152
157
with self .buffer .get_metadata (self .current_idx ) as metadata_buffer :
153
158
read_count = sum (metadata_buffer [1 :])
154
159
written_flag = metadata_buffer [0 ]
155
160
if written_flag and read_count != self .buffer .n_reader :
156
161
# this block is written and not read by all readers
157
- # try to write to the next block
158
- self .current_idx = (self .current_idx +
159
- 1 ) % self .buffer .max_chunks
160
- if self .current_idx == start_index :
161
- # no empty block found
162
- if time .time (
163
- ) - start_time > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning : # noqa
164
- logger .warning (
165
- "No available block found in %s second. " ,
166
- VLLM_RINGBUFFER_WARNING_INTERVAL )
167
- n_warning += 1
168
- # wait for a while (0.1 us)
169
- time .sleep (1e-7 )
162
+ # for writers, `self.current_idx` is the next block to write
163
+ # if this block is not ready to write,
164
+ # we need to wait until it is read by all readers
165
+
166
+ # wait for a while
167
+ time .sleep (RINGBUFFER_SLEEP_INTERVAL )
168
+
169
+ # if we wait for a long time, we should warn the user
170
+ if time .monotonic (
171
+ ) - start_time > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning : # noqa
172
+ logger .warning (
173
+ "No available block found in %s second. " ,
174
+ VLLM_RINGBUFFER_WARNING_INTERVAL )
175
+ n_warning += 1
176
+
170
177
continue
171
178
# found a block that is either
172
179
# (1) not written
@@ -188,13 +195,14 @@ def acquire_write(self):
188
195
metadata_buffer [i ] = 0
189
196
# mark the block as written
190
197
metadata_buffer [0 ] = 1
198
+ self .current_idx = (self .current_idx +
199
+ 1 ) % self .buffer .max_chunks
191
200
break
192
201
193
202
@contextmanager
194
203
def acquire_read (self ):
195
204
assert self ._is_reader , "Only readers can acquire read"
196
- start_index = self .current_idx
197
- start_time = time .time ()
205
+ start_time = time .monotonic ()
198
206
n_warning = 1
199
207
while True :
200
208
with self .buffer .get_metadata (self .current_idx ) as metadata_buffer :
@@ -204,19 +212,22 @@ def acquire_read(self):
204
212
# this block is either
205
213
# (1) not written
206
214
# (2) already read by this reader
207
- # try to read the next block
208
- self .current_idx = (self .current_idx +
209
- 1 ) % self .buffer .max_chunks
210
- if self .current_idx == start_index :
211
- # no block found
212
- if time .time (
213
- ) - start_time > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning : # noqa
214
- logger .warning (
215
- "No available block found in %s second. " ,
216
- VLLM_RINGBUFFER_WARNING_INTERVAL )
217
- n_warning += 1
218
- # wait for a while (0.1 us)
219
- time .sleep (1e-7 )
215
+
216
+ # for readers, `self.current_idx` is the next block to read
217
+ # if this block is not ready,
218
+ # we need to wait until it is written
219
+
220
+ # wait for a while
221
+ time .sleep (RINGBUFFER_SLEEP_INTERVAL )
222
+
223
+ # if we wait for a long time, we should warn the user
224
+ if time .monotonic (
225
+ ) - start_time > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning : # noqa
226
+ logger .warning (
227
+ "No available block found in %s second. " ,
228
+ VLLM_RINGBUFFER_WARNING_INTERVAL )
229
+ n_warning += 1
230
+
220
231
continue
221
232
# found a block that is not read by this reader
222
233
# let caller read from the buffer
@@ -226,6 +237,8 @@ def acquire_read(self):
226
237
# caller has read from the buffer
227
238
# set the read flag
228
239
metadata_buffer [self .reader_rank + 1 ] = 1
240
+ self .current_idx = (self .current_idx +
241
+ 1 ) % self .buffer .max_chunks
229
242
break
230
243
231
244
def enqueue (self , obj ):
0 commit comments