@@ -107,30 +107,60 @@ def get_lines_for_byte_range(path: str, start_offset: int, end_offset: int, inde
107107 return result
108108
109109
110- def get_line_range (path : str , start_line : int , end_line : int ) -> list [str ]:
110+ def get_line_range (path : str , start_line : int , end_line : int , file_index : 'FileIndex | None' = None ) -> list [str ]:
111111 """Get lines from start_line to end_line (inclusive, 1-based).
112112
113- Efficiently reads only the required lines without loading the entire file.
113+ Uses the index to seek directly to the start line's byte offset for efficiency.
114+ Falls back to linear scan if no index is available.
114115
115116 Args:
116117 path: File path
117118 start_line: Starting line number (1-based)
118119 end_line: Ending line number (1-based, inclusive)
120+ file_index: Optional pre-loaded FileIndex for efficient seeking
119121
120122 Returns:
121123 List of lines in the range
122124 """
125+
123126 # Validate line numbers
124127 if start_line < 1 or end_line < 1 :
125128 return []
126129
127- result = []
128- with open (path , encoding = 'utf-8' , errors = 'replace' ) as f :
129- for current_line , line in enumerate (f , 1 ):
130- if current_line > end_line :
130+ # Try to use index for efficient seeking
131+ start_offset = None
132+ if file_index and file_index .line_index :
133+ # Find the closest checkpoint at or before start_line
134+ # line_index format: [[line_number, byte_offset], ...]
135+ for entry in reversed (file_index .line_index ):
136+ checkpoint_line = entry [0 ]
137+ checkpoint_offset = entry [1 ]
138+ if checkpoint_line <= start_line :
139+ start_offset = checkpoint_offset
140+ checkpoint_start_line = checkpoint_line
131141 break
132- if current_line >= start_line :
133- result .append (line .rstrip ('\n \r ' ))
142+
143+ result = []
144+
145+ if start_offset is not None :
146+ # Efficient path: seek to checkpoint and scan from there
147+ with open (path , 'rb' ) as f :
148+ f .seek (start_offset )
149+ current_line = checkpoint_start_line
150+ for raw_line in f :
151+ if current_line > end_line :
152+ break
153+ if current_line >= start_line :
154+ result .append (raw_line .decode ('utf-8' , errors = 'replace' ).rstrip ('\n \r ' ))
155+ current_line += 1
156+ else :
157+ # Fallback: linear scan from beginning (for small files without index)
158+ with open (path , encoding = 'utf-8' , errors = 'replace' ) as f :
159+ for current_line , line in enumerate (f , 1 ):
160+ if current_line > end_line :
161+ break
162+ if current_line >= start_line :
163+ result .append (line .rstrip ('\n \r ' ))
134164
135165 return result
136166
@@ -360,8 +390,6 @@ def samples_command(
360390 try :
361391 # Handle compressed files separately
362392 if file_is_compressed :
363- line_list = list (line_offset )
364-
365393 # Check if this is a seekable zstd file
366394 from rx .seekable_zstd import is_seekable_zstd
367395
@@ -374,70 +402,121 @@ def samples_command(
374402 index = get_or_build_index (path )
375403 frames = read_seek_table (path )
376404
377- # Get samples for each line using seekable zstd
405+ # Get samples for each line/range using seekable zstd
378406 context_data = {}
379407 line_to_offset = {}
380- for line_num in line_list :
381- # Find which frame contains this line
382- frame_idx = None
383- frame_info = None
384- for frame in index .frames :
385- if frame .first_line <= line_num <= frame .last_line :
386- frame_idx = frame .index
387- frame_info = frame
388- first_line = frame .first_line
389- break
390-
391- if frame_idx is None :
392- context_data [line_num ] = []
393- line_to_offset [str (line_num )] = - 1
394- continue
395-
396- # Calculate byte offset for this line
397- # Start with the frame's starting offset, then add bytes for each line before the target
398- frame_offset = frames [frame_idx ].decompressed_offset
399-
400- # Decompress the frame to calculate exact offset
401- frame_data = decompress_frame (path , frame_idx , frames )
402- frame_lines = frame_data .decode ('utf-8' , errors = 'replace' ).split ('\n ' )
403-
404- # Calculate line index within frame (0-based)
405- line_in_frame = line_num - first_line
406-
407- # Calculate byte offset by summing lengths of lines before target
408- byte_offset = frame_offset
409- for i in range (line_in_frame ):
410- byte_offset += len (frame_lines [i ].encode ('utf-8' )) + 1 # +1 for newline
411-
412- line_to_offset [str (line_num )] = byte_offset
413-
414- # Get context lines
415- start_idx = max (0 , line_in_frame - before_context )
416- end_idx = min (len (frame_lines ), line_in_frame + after_context + 1 )
417-
418- context_data [line_num ] = frame_lines [start_idx :end_idx ]
408+
409+ for start , end in parsed_offsets :
410+ if end is None :
411+ # Single line - use context
412+ line_num = start
413+
414+ # Find which frame contains this line
415+ frame_idx = None
416+ for frame in index .frames :
417+ if frame .first_line <= line_num <= frame .last_line :
418+ frame_idx = frame .index
419+ first_line = frame .first_line
420+ break
421+
422+ if frame_idx is None :
423+ context_data [line_num ] = []
424+ line_to_offset [str (line_num )] = - 1
425+ continue
426+
427+ # Calculate byte offset for this line
428+ frame_offset = frames [frame_idx ].decompressed_offset
429+
430+ # Decompress the frame to calculate exact offset
431+ frame_data = decompress_frame (path , frame_idx , frames )
432+ frame_lines = frame_data .decode ('utf-8' , errors = 'replace' ).split ('\n ' )
433+
434+ # Calculate line index within frame (0-based)
435+ line_in_frame = line_num - first_line
436+
437+ # Calculate byte offset by summing lengths of lines before target
438+ byte_offset = frame_offset
439+ for i in range (line_in_frame ):
440+ byte_offset += len (frame_lines [i ].encode ('utf-8' )) + 1 # +1 for newline
441+
442+ line_to_offset [str (line_num )] = byte_offset
443+
444+ # Get context lines
445+ start_idx = max (0 , line_in_frame - before_context )
446+ end_idx = min (len (frame_lines ), line_in_frame + after_context + 1 )
447+
448+ context_data [line_num ] = frame_lines [start_idx :end_idx ]
449+ else :
450+ # Range - get exact lines, ignore context
451+ range_key = f'{ start } -{ end } '
452+ range_lines = []
453+
454+ # Collect all lines in the range, potentially across multiple frames
455+ for line_num in range (start , end + 1 ):
456+ # Find which frame contains this line
457+ frame_idx = None
458+ for frame in index .frames :
459+ if frame .first_line <= line_num <= frame .last_line :
460+ frame_idx = frame .index
461+ first_line = frame .first_line
462+ break
463+
464+ if frame_idx is None :
465+ continue
466+
467+ # Decompress the frame
468+ frame_data = decompress_frame (path , frame_idx , frames )
469+ frame_lines = frame_data .decode ('utf-8' , errors = 'replace' ).split ('\n ' )
470+
471+ # Calculate line index within frame (0-based)
472+ line_in_frame = line_num - first_line
473+ if 0 <= line_in_frame < len (frame_lines ):
474+ range_lines .append (frame_lines [line_in_frame ])
475+
476+ context_data [range_key ] = range_lines
477+ line_to_offset [range_key ] = - 1
419478 else :
420479 # Use generic compressed index for other formats
421480 click .echo (f'Processing compressed file ({ compression_format .value } )...' , err = True )
422481 index_data = get_or_build_compressed_index (path )
423482
424- # Get samples for each line
483+ # Get samples for each line/range
425484 context_data = {}
426- for line_num in line_list :
427- lines = get_decompressed_content_at_line (
428- path ,
429- line_num ,
430- context_before = before_context ,
431- context_after = after_context ,
432- index_data = index_data ,
433- )
434- context_data [line_num ] = lines
485+ line_to_offset = {}
486+ for start , end in parsed_offsets :
487+ if end is None :
488+ # Single line - use context
489+ lines = get_decompressed_content_at_line (
490+ path ,
491+ start ,
492+ context_before = before_context ,
493+ context_after = after_context ,
494+ index_data = index_data ,
495+ )
496+ context_data [start ] = lines
497+ line_to_offset [str (start )] = - 1
498+ else :
499+ # Range - get exact lines, ignore context
500+ range_key = f'{ start } -{ end } '
501+ range_lines = []
502+ for line_num in range (start , end + 1 ):
503+ lines = get_decompressed_content_at_line (
504+ path ,
505+ line_num ,
506+ context_before = 0 ,
507+ context_after = 0 ,
508+ index_data = index_data ,
509+ )
510+ if lines :
511+ range_lines .extend (lines )
512+ context_data [range_key ] = range_lines
513+ line_to_offset [range_key ] = - 1
435514
436515 # Use calculated offsets for seekable zstd, -1 for other formats
437516 if is_seekable_zstd (path ):
438517 lines_dict = line_to_offset
439518 else :
440- lines_dict = { str ( ln ): - 1 for ln in line_list }
519+ lines_dict = line_to_offset
441520
442521 response = SamplesResponse (
443522 path = path ,
@@ -528,7 +607,7 @@ def samples_command(
528607 else :
529608 # Range - get exact lines, ignore context
530609 range_key = f'{ start } -{ end } '
531- lines = get_line_range (path , start , end )
610+ lines = get_line_range (path , start , end , index_data )
532611 context_data [range_key ] = lines
533612 # For ranges, byte offset is not meaningful - use -1 to skip expensive calculation
534613 line_to_offset [range_key ] = - 1
0 commit comments