@@ -50,7 +50,7 @@ def get_large_file_threshold_bytes() -> int:
5050 Controlled by RX_LARGE_TEXT_FILE_MB environment variable.
5151 Default: 100MB
5252 """
53- threshold_mb = get_int_env ("RX_LARGE_TEXT_FILE_MB " )
53+ threshold_mb = get_int_env ("DEFAULT_LARGE_FILE_MB " )
5454 if threshold_mb <= 0 :
5555 threshold_mb = DEFAULT_LARGE_FILE_MB
5656 return threshold_mb * 1024 * 1024
@@ -418,6 +418,170 @@ def find_line_offset(line_index: list[list[int]], target_line: int) -> tuple[int
418418 return (line_index [idx ][0 ], line_index [idx ][1 ])
419419
420420
421+ def calculate_exact_offset_for_line (filename : str , target_line : int , index_data : dict | None = None ) -> int :
422+ """Calculate the exact byte offset for a given line number.
423+
424+ Args:
425+ filename: Path to the file
426+ target_line: Line number (1-based) to find offset for
427+ index_data: Optional index data. If None, will try to load or calculate
428+
429+ Returns:
430+ Byte offset of the line, or -1 if cannot determine (large file without index)
431+ """
432+ # If no index provided, try to load it
433+ if index_data is None :
434+ index_path = get_index_path (filename )
435+ index_data = load_index (index_path )
436+
437+ # If we have an index, use it
438+ if index_data :
439+ line_index = index_data .get ("line_index" , [])
440+ if not line_index :
441+ return - 1
442+
443+ # Find closest indexed line
444+ indexed_line , indexed_offset = find_line_offset (line_index , target_line )
445+
446+ # If exact match, return it
447+ if indexed_line == target_line :
448+ return indexed_offset
449+
450+ # Read from indexed position and count to target
451+ # Sequential reading is fast due to OS buffering and disk read-ahead
452+ try :
453+ with open (filename , "rb" ) as f :
454+ f .seek (indexed_offset )
455+ current_line = indexed_line
456+ current_offset = indexed_offset
457+
458+ for line_bytes in f :
459+ if current_line == target_line :
460+ return current_offset
461+ current_offset += len (line_bytes )
462+ current_line += 1
463+
464+ # Reached EOF before finding target line
465+ return - 1
466+ except (IOError , OSError ) as e :
467+ logger .error (f"Failed to read file { filename } : { e } " )
468+ return - 1
469+
470+ # No index - check if file is small enough to read
471+ try :
472+ file_size = os .path .getsize (filename )
473+ threshold = get_large_file_threshold_bytes ()
474+
475+ if file_size > threshold :
476+ # Large file without index - cannot determine
477+ return - 1
478+
479+ # Small file - read from beginning
480+ with open (filename , "rb" ) as f :
481+ current_line = 0
482+ current_offset = 0
483+
484+ for line_bytes in f :
485+ current_line += 1
486+ if current_line == target_line :
487+ return current_offset
488+ current_offset += len (line_bytes )
489+
490+ # Target line beyond EOF
491+ return - 1
492+ except (IOError , OSError ) as e :
493+ logger .error (f"Failed to process file { filename } : { e } " )
494+ return - 1
495+
496+
497+ def calculate_exact_line_for_offset (filename : str , target_offset : int , index_data : dict | None = None ) -> int :
498+ """Calculate the exact line number for a given byte offset.
499+
500+ Args:
501+ filename: Path to the file
502+ target_offset: Byte offset to find line number for
503+ index_data: Optional index data. If None, will try to load or calculate
504+
505+ Returns:
506+ Line number (1-based) at the offset, or -1 if cannot determine
507+ """
508+ # If no index provided, try to load it
509+ if index_data is None :
510+ index_path = get_index_path (filename )
511+ index_data = load_index (index_path )
512+
513+ # If we have an index, use it
514+ if index_data :
515+ line_index = index_data .get ("line_index" , [])
516+ if not line_index :
517+ return - 1
518+
519+ # Find closest indexed line before target offset
520+ # Binary search by offset
521+ offsets = [entry [1 ] for entry in line_index ]
522+ idx = bisect .bisect_right (offsets , target_offset ) - 1
523+ if idx < 0 :
524+ idx = 0
525+
526+ indexed_line , indexed_offset = line_index [idx ]
527+
528+ # If exact match, return it
529+ if indexed_offset == target_offset :
530+ return indexed_line
531+
532+ # Read from indexed position and count lines to target offset
533+ # Sequential reading is fast due to OS buffering and disk read-ahead
534+ try :
535+ with open (filename , "rb" ) as f :
536+ f .seek (indexed_offset )
537+ current_line = indexed_line
538+ current_offset = indexed_offset
539+
540+ for line_bytes in f :
541+ if current_offset == target_offset :
542+ return current_line
543+ if current_offset + len (line_bytes ) > target_offset :
544+ # Target offset is within this line
545+ return current_line
546+ current_offset += len (line_bytes )
547+ current_line += 1
548+
549+ # Reached EOF
550+ return - 1
551+ except (IOError , OSError ) as e :
552+ logger .error (f"Failed to read file { filename } : { e } " )
553+ return - 1
554+
555+ # No index - check if file is small enough to read
556+ try :
557+ file_size = os .path .getsize (filename )
558+ threshold = get_large_file_threshold_bytes ()
559+
560+ if file_size > threshold :
561+ # Large file without index - cannot determine
562+ return - 1
563+
564+ # Small file - read from beginning
565+ with open (filename , "rb" ) as f :
566+ current_line = 0
567+ current_offset = 0
568+
569+ for line_bytes in f :
570+ current_line += 1
571+ if current_offset == target_offset :
572+ return current_line
573+ if current_offset + len (line_bytes ) > target_offset :
574+ # Target offset is within this line
575+ return current_line
576+ current_offset += len (line_bytes )
577+
578+ # EOF
579+ return - 1
580+ except (IOError , OSError ) as e :
581+ logger .error (f"Failed to process file { filename } : { e } " )
582+ return - 1
583+
584+
421585def get_index_info (source_path : str ) -> dict | None :
422586 """Get information about an existing index.
423587
0 commit comments