Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 69 additions & 30 deletions cmd/entire/cli/transcript.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package cli

import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
Expand All @@ -19,8 +21,8 @@ const (
contentTypeText = "text"
)

// parseTranscript reads and parses a Claude Code transcript file
// Uses a large buffer to handle very long lines (tool outputs can be huge)
// parseTranscript reads and parses a Claude Code transcript file.
// Uses bufio.Reader to handle arbitrarily long lines.
func parseTranscript(path string) ([]transcriptLine, error) {
file, err := os.Open(path) //nolint:gosec // Reading from controlled git metadata path
if err != nil {
Expand All @@ -29,25 +31,37 @@ func parseTranscript(path string) ([]transcriptLine, error) {
defer func() { _ = file.Close() }()

var lines []transcriptLine
scanner := bufio.NewScanner(file)
// Use large buffer for very long lines (transcript lines can be huge)
scanner.Buffer(make([]byte, 0, ScannerBufferSize), ScannerBufferSize)
reader := bufio.NewReader(file)

for {
lineBytes, err := reader.ReadBytes('\n')
if err != nil && err != io.EOF {
return nil, fmt.Errorf("failed to read transcript: %w", err)
}

// Handle empty line or EOF without content
if len(lineBytes) == 0 {
if err == io.EOF {
break
}
continue
}

for scanner.Scan() {
var line transcriptLine
if err := json.Unmarshal(scanner.Bytes(), &line); err != nil {
continue // Skip malformed lines
if err := json.Unmarshal(lineBytes, &line); err == nil {
lines = append(lines, line)
}
lines = append(lines, line)
}

if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to scan transcript: %w", err)
if err == io.EOF {
break
}
}

return lines, nil
}

// parseTranscriptFromLine reads and parses a transcript file starting from a specific line.
// Uses bufio.Reader to handle arbitrarily long lines.
// Returns:
// - lines: parsed transcript lines from startLine onwards (malformed lines skipped)
// - totalLines: total number of lines in the file (including malformed ones)
Expand All @@ -63,45 +77,70 @@ func parseTranscriptFromLine(path string, startLine int) ([]transcriptLine, int,
defer func() { _ = file.Close() }()

var lines []transcriptLine
scanner := bufio.NewScanner(file)
scanner.Buffer(make([]byte, 0, ScannerBufferSize), ScannerBufferSize)
reader := bufio.NewReader(file)

totalLines := 0
for scanner.Scan() {
for {
lineBytes, err := reader.ReadBytes('\n')
if err != nil && err != io.EOF {
return nil, 0, fmt.Errorf("failed to read transcript: %w", err)
}

// Handle empty line or EOF without content
if len(lineBytes) == 0 {
if err == io.EOF {
break
}
continue
}

// Count all lines for totalLines, but only parse after startLine
if totalLines >= startLine {
var line transcriptLine
if err := json.Unmarshal(scanner.Bytes(), &line); err == nil {
if err := json.Unmarshal(lineBytes, &line); err == nil {
lines = append(lines, line)
}
// Note: malformed lines are silently skipped (consistent with parseTranscript)
}
totalLines++
}

if err := scanner.Err(); err != nil {
return nil, 0, fmt.Errorf("failed to scan transcript: %w", err)
if err == io.EOF {
break
}
}

return lines, totalLines, nil
}

// parseTranscriptFromBytes parses transcript content from a byte slice
// parseTranscriptFromBytes parses transcript content from a byte slice.
// Uses bufio.Reader to handle arbitrarily long lines.
func parseTranscriptFromBytes(content []byte) ([]transcriptLine, error) {
var lines []transcriptLine
scanner := bufio.NewScanner(strings.NewReader(string(content)))
scanner.Buffer(make([]byte, 0, ScannerBufferSize), ScannerBufferSize)
reader := bufio.NewReader(bytes.NewReader(content))

for {
lineBytes, err := reader.ReadBytes('\n')
if err != nil && err != io.EOF {
return nil, fmt.Errorf("failed to read transcript: %w", err)
}

// Handle empty line or EOF without content
if len(lineBytes) == 0 {
if err == io.EOF {
break
}
continue
}

for scanner.Scan() {
var line transcriptLine
if err := json.Unmarshal(scanner.Bytes(), &line); err != nil {
continue // Skip malformed lines
if err := json.Unmarshal(lineBytes, &line); err == nil {
lines = append(lines, line)
}
lines = append(lines, line)
}

if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to scan transcript: %w", err)
if err == io.EOF {
break
}
}

return lines, nil
}

Expand Down
39 changes: 39 additions & 0 deletions cmd/entire/cli/transcript_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,45 @@ func TestParseTranscript_LargeLines(t *testing.T) {
}
}

func TestParseTranscript_LineExceedsScannerBuffer(t *testing.T) {
// Create a line larger than the 10MB ScannerBufferSize limit.
// This simulates a transcript with a very large tool output (e.g., reading a huge file).
// The current implementation using bufio.Scanner will fail with "token too long".
largeContent := strings.Repeat("x", 11*1024*1024) // 11MB - exceeds 10MB limit
content := `{"type":"user","uuid":"user-1","message":{"content":"` + largeContent + `"}}`

tmpFile := createTempTranscript(t, content)

lines, err := parseTranscript(tmpFile)
if err != nil {
t.Fatalf("unexpected error parsing line exceeding buffer: %v", err)
}

if len(lines) != 1 {
t.Fatalf("expected 1 line, got %d", len(lines))
}
}

func TestParseTranscriptFromLine_LineExceedsScannerBuffer(t *testing.T) {
// Same test for parseTranscriptFromLine - should handle lines > 10MB
largeContent := strings.Repeat("x", 11*1024*1024) // 11MB
content := `{"type":"user","uuid":"user-1","message":{"content":"` + largeContent + `"}}`

tmpFile := createTempTranscript(t, content)

lines, totalLines, err := parseTranscriptFromLine(tmpFile, 0)
if err != nil {
t.Fatalf("unexpected error parsing line exceeding buffer: %v", err)
}

if totalLines != 1 {
t.Errorf("expected totalLines=1, got %d", totalLines)
}
if len(lines) != 1 {
t.Fatalf("expected 1 parsed line, got %d", len(lines))
}
}

func TestExtractLastUserPrompt_StringContent(t *testing.T) {
transcript := []transcriptLine{
{Type: "user", UUID: "u1", Message: []byte(`{"content":"First prompt"}`)},
Expand Down