Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for inline timestamps #102

Merged
merged 3 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions subtitles.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ func (l Line) String() string {
type LineItem struct {
InlineStyle *StyleAttributes
Style *Style
StartAt time.Duration
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved
Text string
}

Expand Down
89 changes: 73 additions & 16 deletions webvtt.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ var (
bytesWebVTTItalicStartTag = []byte("<i>")
bytesWebVTTTimeBoundariesSeparator = []byte(webvttTimeBoundariesSeparator)
webVTTRegexpTag = regexp.MustCompile(`(</*\s*([^\.\s]+)(\.[^\s/]*)*\s*([^/]*)\s*/*>)`)
inlineTimestampRegexp = regexp.MustCompile(`<((?:\d{2,}:)?\d{2}:\d{2}\.\d{3})>`)
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved
webVTTEscaper = strings.NewReplacer("&", "&amp;", "<", "&lt;")
webVTTUnescaper = strings.NewReplacer("&amp;", "&", "&lt;", "<")
)
Expand Down Expand Up @@ -357,29 +358,80 @@ func parseTextWebVTT(i string) (o Line) {
}

case html.TextToken:
if s := strings.TrimSpace(string(tr.Raw())); s != "" {
// Get style attribute
var sa *StyleAttributes
if len(webVTTTagStack) > 0 {
tags := make([]WebVTTTag, len(webVTTTagStack))
copy(tags, webVTTTagStack)
sa = &StyleAttributes{
WebVTTTags: tags,
}
sa.propagateWebVTTAttributes()
s := string(tr.Raw())
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved
// Get style attribute
var sa *StyleAttributes
if len(webVTTTagStack) > 0 {
tags := make([]WebVTTTag, len(webVTTTagStack))
copy(tags, webVTTTagStack)
sa = &StyleAttributes{
WebVTTTags: tags,
}

// Append item
o.Items = append(o.Items, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
})
sa.propagateWebVTTAttributes()
}

// Append item
items := parseTextWebVTTText(sa, s)
o.Items = append(o.Items, items...)
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved
}
}
return
}

func parseTextWebVTTText(sa *StyleAttributes, line string) []LineItem {
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved
var ret []LineItem
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved

// split the line by inline timestamps
indexes := inlineTimestampRegexp.FindAllStringSubmatchIndex(line, -1)
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved
if len(indexes) > 0 {
// get the text before the first timestamp
s := strings.TrimSpace(line[:indexes[0][0]])
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved
if s != "" {
ret = append(ret, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
})
}

for i, match := range indexes {
// get the text between the timestamps
endIndex := len(line)
if i+1 < len(indexes) {
endIndex = indexes[i+1][0]
}
s := strings.TrimSpace(line[match[1]:endIndex])
asticode marked this conversation as resolved.
Show resolved Hide resolved
if s == "" {
continue
}

// get the timestamp
ts := line[match[2]:match[3]]
asticode marked this conversation as resolved.
Show resolved Hide resolved

// Parse timestamp
t, err := parseDurationWebVTT(ts)
if err != nil {
log.Printf("astisub: parsing webvtt duration %s failed, ignoring: %v", ts, err)
}

ret = append(ret, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
StartAt: t,
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved
})
}
} else {
s := strings.TrimSpace(line)
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved
if s != "" {
ret = append(ret, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
})
}
}

return ret
}

// formatDurationWebVTT formats a .vtt duration
func formatDurationWebVTT(i time.Duration) string {
return formatDuration(i, ".", 3)
Expand Down Expand Up @@ -559,6 +611,11 @@ func (l Line) webVTTBytes() (c []byte) {
}

func (li LineItem) webVTTBytes() (c []byte) {
// Add timestamp
if li.StartAt > 0 {
c = append(c, []byte("<"+formatDurationWebVTT(li.StartAt)+">")...)
}

// Get color
var color string
if li.InlineStyle != nil && li.InlineStyle.TTMLColor != nil {
Expand Down
29 changes: 29 additions & 0 deletions webvtt_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,35 @@ func TestParseTextWebVTT(t *testing.T) {
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "Incorrect end tag", s.Items[0].Text)
})

t.Run("When inline timestamps are included", func(t *testing.T) {
testData := `<00:01:01.000>With inline <00:01:02.000>timestamps`

s := parseTextWebVTT(testData)
assert.Equal(t, 2, len(s.Items))
assert.Equal(t, "With inline", s.Items[0].Text)
assert.Equal(t, time.Minute+time.Second, s.Items[0].StartAt)
assert.Equal(t, "timestamps", s.Items[1].Text)
assert.Equal(t, time.Minute+2*time.Second, s.Items[1].StartAt)
})

t.Run("When inline timestamps together", func(t *testing.T) {
testData := `<00:01:01.000><00:01:02.000>With timestamp tags together`

s := parseTextWebVTT(testData)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With timestamp tags together", s.Items[0].Text)
assert.Equal(t, time.Minute+2*time.Second, s.Items[0].StartAt)
})

t.Run("When inline timestamps is at end", func(t *testing.T) {
testData := `With end timestamp<00:01:02.000>`

s := parseTextWebVTT(testData)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With end timestamp", s.Items[0].Text)
assert.Equal(t, time.Duration(0), s.Items[0].StartAt)
})
}

func TestTimestampMap(t *testing.T) {
Expand Down
11 changes: 9 additions & 2 deletions webvtt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,15 @@ func TestWebVTTTags(t *testing.T) {
<customed_tag.class1.class2>Text here</customed_tag>

00:05:00.000 --> 00:06:00.000
<v Joe>Joe says something</v> <v Bob>Bob says something</v>`
<v Joe>Joe says something</v> <v Bob>Bob says something</v>

00:06:00.000 --> 00:07:00.000
Text with a <00:06:30.000>timestamp in the middle`

s, err := astisub.ReadFromWebVTT(strings.NewReader(testData))
require.NoError(t, err)

require.Len(t, s.Items, 5)
require.Len(t, s.Items, 6)

b := &bytes.Buffer{}
err = s.WriteToWebVTT(b)
Expand All @@ -210,5 +213,9 @@ func TestWebVTTTags(t *testing.T) {
5
00:05:00.000 --> 00:06:00.000
<v Joe>Joe says something Bob says something

6
00:06:00.000 --> 00:07:00.000
Text with a <00:06:30.000>timestamp in the middle
`, b.String())
}
Loading