Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability to grab YouTube video transcript with timestamps #1289

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Add the ability to grab YouTube video transcript with timestamps
This commit adds the ability to grab the transcript
of a YouTube video with timestamps. The timestamps
are formatted as HH:MM:SS and are prepended to
each line of the transcript. The feature is enabled
by the new `--transcript-with-timestamps` flag,
so it's similar to the existing `--transcript` flag.

Example future use-case:

Providing summary of a video that includes timestamps
for quick navigation to specific parts of the video.
  • Loading branch information
thevops committed Feb 7, 2025
commit f3a1982e30211da69fdbc289aa2c827cdc07b3bc
12 changes: 9 additions & 3 deletions cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ func Cli(version string) (err error) {
func processYoutubeVideo(
flags *Flags, registry *core.PluginRegistry, videoId string) (message string, err error) {

if (!flags.YouTubeComments && !flags.YouTubeMetadata) || flags.YouTubeTranscript {
if (!flags.YouTubeComments && !flags.YouTubeMetadata) || flags.YouTubeTranscript || flags.YouTubeTranscriptWithTimestamps {
var transcript string
var language = "en"
if flags.Language != "" || registry.Language.DefaultLanguage.Value != "" {
Expand All @@ -297,8 +297,14 @@ func processYoutubeVideo(
language = registry.Language.DefaultLanguage.Value
}
}
if transcript, err = registry.YouTube.GrabTranscript(videoId, language); err != nil {
return
if flags.YouTubeTranscriptWithTimestamps {
if transcript, err = registry.YouTube.GrabTranscriptWithTimestamps(videoId, language); err != nil {
return
}
} else {
if transcript, err = registry.YouTube.GrabTranscript(videoId, language); err != nil {
return
}
}
message = AppendMessage(message, transcript)
}
Expand Down
1 change: 1 addition & 0 deletions cli/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ type Flags struct {
YouTube string `short:"y" long:"youtube" description:"YouTube video or play list \"URL\" to grab transcript, comments from it and send to chat or print it put to the console and store it in the output file"`
YouTubePlaylist bool `long:"playlist" description:"Prefer playlist over video if both ids are present in the URL"`
YouTubeTranscript bool `long:"transcript" description:"Grab transcript from YouTube video and send to chat (it is used per default)."`
YouTubeTranscriptWithTimestamps bool `long:"transcript-with-timestamps" description:"Grab transcript from YouTube video with timestamps and send to chat"`
YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"`
YouTubeMetadata bool `long:"metadata" description:"Output video metadata"`
Language string `short:"g" long:"language" description:"Specify the Language Code for the chat, e.g. -g=en -g=zh" default:""`
Expand Down
58 changes: 53 additions & 5 deletions plugins/tools/youtube/youtube.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,45 @@ func (o *YouTube) GrabTranscript(videoId string, language string) (ret string, e
return
}

func (o *YouTube) GrabTranscriptWithTimestamps(videoId string, language string) (ret string, err error) {
var transcript string
if transcript, err = o.GrabTranscriptBase(videoId, language); err != nil {
err = fmt.Errorf("transcript not available. (%v)", err)
return
}

// Parse the XML transcript
doc := soup.HTMLParse(transcript)
// Extract the text content from the <text> tags
textTags := doc.FindAll("text")
var textBuilder strings.Builder
for _, textTag := range textTags {
// Extract the start and duration attributes
start := textTag.Attrs()["start"]
dur := textTag.Attrs()["dur"]
end := fmt.Sprintf("%f", parseFloat(start)+parseFloat(dur))
// Format the timestamps
startFormatted := formatTimestamp(parseFloat(start))
endFormatted := formatTimestamp(parseFloat(end))
text := strings.ReplaceAll(textTag.Text(), "&#39;", "'")
textBuilder.WriteString(fmt.Sprintf("[%s - %s] %s\n", startFormatted, endFormatted, text))
}
ret = textBuilder.String()
return
}

func parseFloat(s string) float64 {
f, _ := strconv.ParseFloat(s, 64)
return f
}

func formatTimestamp(seconds float64) string {
hours := int(seconds) / 3600
minutes := (int(seconds) % 3600) / 60
secs := int(seconds) % 60
return fmt.Sprintf("%02d:%02d:%02d", hours, minutes, secs)
}

func (o *YouTube) GrabTranscriptBase(videoId string, language string) (ret string, err error) {
if err = o.initService(); err != nil {
return
Expand Down Expand Up @@ -265,6 +304,13 @@ func (o *YouTube) Grab(url string, options *Options) (ret *VideoInfo, err error)
return
}
}

if options.TranscriptWithTimestamps {
if ret.Transcript, err = o.GrabTranscriptWithTimestamps(videoId, "en"); err != nil {
return
}
}

return
}

Expand Down Expand Up @@ -372,11 +418,12 @@ type VideoMeta struct {
}

type Options struct {
Duration bool
Transcript bool
Comments bool
Lang string
Metadata bool
Duration bool
Transcript bool
TranscriptWithTimestamps bool
Comments bool
Lang string
Metadata bool
}

type VideoInfo struct {
Expand Down Expand Up @@ -437,6 +484,7 @@ func (o *YouTube) GrabByFlags() (ret *VideoInfo, err error) {
options := &Options{}
flag.BoolVar(&options.Duration, "duration", false, "Output only the duration")
flag.BoolVar(&options.Transcript, "transcript", false, "Output only the transcript")
flag.BoolVar(&options.TranscriptWithTimestamps, "transcriptWithTimestamps", false, "Output only the transcript with timestamps")
flag.BoolVar(&options.Comments, "comments", false, "Output the comments on the video")
flag.StringVar(&options.Lang, "lang", "en", "Language for the transcript (default: English)")
flag.BoolVar(&options.Metadata, "metadata", false, "Output video metadata")
Expand Down