Skip to content

Commit 8dec2bd

Browse files
committed
Improve efficiency of slicing
1 parent 684726f commit 8dec2bd

File tree

1 file changed

+76
-45
lines changed

1 file changed

+76
-45
lines changed

libcachesim/trace_reader.py

Lines changed: 76 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -26,40 +26,72 @@ class TraceReaderSliceIterator:
2626
"""Iterator for sliced TraceReader."""
2727

2828
def __init__(self, reader: "TraceReader", start: int, stop: int, step: int):
29-
self.reader = reader
29+
# Clone the reader to avoid side effects on the original
30+
self.reader = reader.clone()
3031
self.start = start
3132
self.stop = stop
3233
self.step = step
3334
self.current = start
3435

36+
# Initialize position: reset and skip to start position once
37+
self.reader.reset()
38+
if start > 0:
39+
self._skip_to_start_position(start)
40+
3541
def __iter__(self) -> Iterator[Request]:
3642
return self
3743

3844
def __next__(self) -> Request:
3945
if self.current >= self.stop:
4046
raise StopIteration
4147

42-
# Reset reader and skip to current position
43-
self.reader.reset()
48+
# Read the current request
49+
try:
50+
req = self.reader.read_one_req()
51+
except RuntimeError:
52+
raise StopIteration
53+
54+
# Advance to next position based on step
55+
if self.step > 1:
56+
self._skip_requests(self.step - 1)
4457

45-
# Check if we can use skip_n_req or need to simulate with read_one_req
46-
# zstd files cannot use skip_n_req
58+
self.current += self.step
59+
return req
60+
61+
def _skip_to_start_position(self, position: int) -> None:
62+
"""Skip to the start position efficiently."""
4763
if not self.reader._reader.is_zstd_file:
48-
logger.debug(f"Skipping {self.current} requests using skip_n_req")
64+
# Try using skip_n_req for non-zstd files
65+
skipped = self.reader.skip_n_req(position)
66+
if skipped != position:
67+
# If we couldn't skip the expected number, simulate the rest
68+
remaining = position - skipped
69+
self._simulate_skip(remaining)
70+
else:
71+
# For zstd files, always simulate
72+
self._simulate_skip(position)
73+
74+
def _skip_requests(self, n: int) -> None:
75+
"""Skip n requests efficiently."""
76+
if not self.reader._reader.is_zstd_file:
77+
# Try using skip_n_req for non-zstd files
78+
skipped = self.reader.skip_n_req(n)
79+
if skipped != n:
80+
# If we couldn't skip all, we're likely at EOF
81+
self.current = self.stop # Mark as done
82+
else:
83+
# For zstd files, simulate
84+
self._simulate_skip(n)
85+
86+
def _simulate_skip(self, n: int) -> None:
87+
"""Simulate skip by reading requests one by one."""
88+
for _ in range(n):
4989
try:
50-
self.reader.skip_n_req(self.current)
51-
req = self.reader.read_one_req()
90+
self.reader.read_one_req()
5291
except RuntimeError:
53-
logger.warning(f"Failed to skip {self.current} requests, falling back to simulation")
54-
# Fallback to simulation if skip_n_req fails
55-
req = self.reader._simulate_skip_and_read_single(self.current)
56-
else:
57-
logger.debug(f"Simulating skip by reading {self.current} requests one by one")
58-
# Simulate skip by reading requests one by one
59-
req = self.reader._simulate_skip_and_read_single(self.current)
60-
61-
self.current += self.step
62-
return req
92+
# If we can't read more, we're at EOF
93+
self.current = self.stop # Mark as done
94+
break
6395

6496

6597
class TraceReader(ReaderProtocol):
@@ -359,36 +391,35 @@ def __getitem__(self, key: Union[int, slice]) -> Union[Request, TraceReaderSlice
359391

360392
self._reader.reset()
361393

362-
# Check if we can use skip_n_req or need to simulate
363-
if self._can_use_skip_n_req():
364-
try:
365-
self._reader.skip_n_req(key)
366-
req = Request()
367-
ret = self._reader.read_one_req(req)
368-
if ret != 0:
369-
raise RuntimeError("Failed to read request")
370-
return req
371-
except RuntimeError:
372-
# Fallback to simulation
373-
self._reader.reset()
374-
return self._simulate_skip_and_read_single(key)
375-
else:
376-
# Simulate skip by reading requests one by one
377-
return self._simulate_skip_and_read_single(key)
394+
# Try to skip to the target position
395+
if key > 0:
396+
if not self._reader.is_zstd_file:
397+
# For non-zstd files, try skip_n_req and check return value
398+
skipped = self._reader.skip_n_req(key)
399+
if skipped != key:
400+
# If we couldn't skip the expected number, simulate the rest
401+
remaining = key - skipped
402+
self._simulate_skip_single(remaining)
403+
else:
404+
# For zstd files, always simulate
405+
self._simulate_skip_single(key)
406+
407+
# Read the target request
408+
req = Request()
409+
ret = self._reader.read_one_req(req)
410+
if ret != 0:
411+
raise IndexError(f"Cannot read request at index {key}")
412+
return req
378413
else:
379414
raise TypeError("TraceReader indices must be integers or slices")
380415

381-
def _simulate_skip_and_read_single(self, index: int) -> Request:
382-
"""Simulate skip_n_req by reading requests one by one for single index access."""
383-
for _ in range(index):
416+
def _simulate_skip_single(self, n: int) -> None:
417+
"""Simulate skip by reading requests one by one for single index access."""
418+
for i in range(n):
384419
req = Request()
385420
ret = self._reader.read_one_req(req)
386421
if ret != 0:
387-
raise IndexError(f"Cannot reach index {index}")
388-
389-
# Read the target request
390-
req = Request()
391-
ret = self._reader.read_one_req(req)
392-
if ret != 0:
393-
raise IndexError(f"Cannot read request at index {index}")
394-
return req
422+
raise IndexError(f"Cannot skip to position, reached EOF at {i}")
423+
424+
# Note: Removed old inefficient methods _can_use_skip_n_req and _simulate_skip_and_read_single
425+
# The new implementation is more efficient and handles skip_n_req return values properly

0 commit comments

Comments
 (0)