From 2e06f1f71ee4823693eae04f6f8bd66cc9f0f87a Mon Sep 17 00:00:00 2001 From: Julian Kornberger Date: Sun, 24 Dec 2023 12:18:32 +0100 Subject: [PATCH] Rewrite format filters and allow filtering by langauge --- client_test.go | 24 ++++ cmd/youtubedr/download.go | 5 +- cmd/youtubedr/downloader.go | 57 ++++---- cmd/youtubedr/info.go | 4 + cmd/youtubedr/url.go | 3 +- downloader/downloader.go | 30 ++-- downloader/downloader_hq_test.go | 3 +- downloader/downloader_test.go | 6 +- errors_test.go | 2 + format_list.go | 81 +++++------ format_list_test.go | 240 ++++++++++++------------------- response_data.go | 7 + video_test.go | 21 ++- 13 files changed, 214 insertions(+), 269 deletions(-) diff --git a/client_test.go b/client_test.go index eef56a92..3c9c121a 100644 --- a/client_test.go +++ b/client_test.go @@ -156,6 +156,30 @@ func TestGetVideoWithManifestURL(t *testing.T) { assert.NotZero(size) } +func TestGetVideo_MultiLanguage(t *testing.T) { + assert, require := assert.New(t), require.New(t) + video, err := testClient.GetVideo("https://www.youtube.com/watch?v=pU9sHwNKc2c") + require.NoError(err) + require.NotNil(video) + + // collect languages + var languageNames, lanaguageIDs []string + for _, format := range video.Formats { + if format.AudioTrack != nil { + languageNames = append(languageNames, format.LanguageDisplayName()) + lanaguageIDs = append(lanaguageIDs, format.AudioTrack.ID) + } + } + + assert.Contains(languageNames, "English original") + assert.Contains(languageNames, "Portuguese (Brazil)") + assert.Contains(lanaguageIDs, "en.4") + assert.Contains(lanaguageIDs, "pt-BR.3") + + assert.Empty(video.Formats.Language("Does not exist")) + assert.NotEmpty(video.Formats.Language("English original")) +} + func TestGetStream(t *testing.T) { assert, require := assert.New(t), require.New(t) diff --git a/cmd/youtubedr/download.go b/cmd/youtubedr/download.go index 93aeb49a..e2b98b35 100644 --- a/cmd/youtubedr/download.go +++ b/cmd/youtubedr/download.go @@ -32,8 +32,7 @@ func init() { downloadCmd.Flags().StringVarP(&outputFile, "filename", "o", "", "The output file, the default is genated by the video title.") downloadCmd.Flags().StringVarP(&outputDir, "directory", "d", ".", "The output directory.") - addQualityFlag(downloadCmd.Flags()) - addMimeTypeFlag(downloadCmd.Flags()) + addVideoSelectionFlags(downloadCmd.Flags()) } func download(id string) error { @@ -48,7 +47,7 @@ func download(id string) error { if err := checkFFMPEG(); err != nil { return err } - return downloader.DownloadComposite(context.Background(), outputFile, video, outputQuality, mimetype) + return downloader.DownloadComposite(context.Background(), outputFile, video, outputQuality, mimetype, language) } return downloader.Download(context.Background(), video, format, outputFile) diff --git a/cmd/youtubedr/downloader.go b/cmd/youtubedr/downloader.go index 697f8280..fa4c37ff 100644 --- a/cmd/youtubedr/downloader.go +++ b/cmd/youtubedr/downloader.go @@ -2,7 +2,6 @@ package main import ( "crypto/tls" - "errors" "fmt" "net" "net/http" @@ -20,16 +19,15 @@ import ( var ( insecureSkipVerify bool // skip TLS server validation outputQuality string // itag number or quality string - mimetype string // mimetype + mimetype string + language string downloader *ytdl.Downloader ) -func addQualityFlag(flagSet *pflag.FlagSet) { +func addVideoSelectionFlags(flagSet *pflag.FlagSet) { flagSet.StringVarP(&outputQuality, "quality", "q", "medium", "The itag number or quality label (hd720, medium)") -} - -func addMimeTypeFlag(flagSet *pflag.FlagSet) { - flagSet.StringVarP(&mimetype, "mimetype", "m", "mp4", "Mime-Type to filter (mp4, webm, av01, avc1) - applicable if --quality used is quality label") + flagSet.StringVarP(&mimetype, "mimetype", "m", "", "Mime-Type to filter (mp4, webm, av01, avc1) - applicable if --quality used is quality label") + flagSet.StringVarP(&language, "language", "l", "", "Language to filter") } func getDownloader() *ytdl.Downloader { @@ -70,41 +68,34 @@ func getDownloader() *ytdl.Downloader { return downloader } -func getVideoWithFormat(id string) (*youtube.Video, *youtube.Format, error) { +func getVideoWithFormat(videoID string) (*youtube.Video, *youtube.Format, error) { dl := getDownloader() - video, err := dl.GetVideo(id) + video, err := dl.GetVideo(videoID) if err != nil { return nil, nil, err } + + itag, _ := strconv.Atoi(outputQuality) formats := video.Formats + + if language != "" { + formats = formats.Language(language) + } if mimetype != "" { formats = formats.Type(mimetype) } - if len(formats) == 0 { - return nil, nil, errors.New("no formats found") + if outputQuality != "" { + formats = formats.Quality(outputQuality) } - - var format *youtube.Format - itag, _ := strconv.Atoi(outputQuality) - switch { - case itag > 0: - // When an itag is specified, do not filter format with mime-type - format = video.Formats.FindByItag(itag) - if format == nil { - return nil, nil, fmt.Errorf("unable to find format with itag %d", itag) - } - - case outputQuality != "": - format = formats.FindByQuality(outputQuality) - if format == nil { - return nil, nil, fmt.Errorf("unable to find format with quality %s", outputQuality) - } - - default: - // select the first format - formats.Sort() - format = &formats[0] + if itag > 0 { + formats = formats.Itag(itag) + } + if formats == nil { + return nil, nil, fmt.Errorf("unable to find the specified format") } - return video, format, nil + formats.Sort() + + // select the first format + return video, &formats[0], nil } diff --git a/cmd/youtubedr/info.go b/cmd/youtubedr/info.go index 171fa0b7..0bf83608 100644 --- a/cmd/youtubedr/info.go +++ b/cmd/youtubedr/info.go @@ -18,6 +18,7 @@ type VideoFormat struct { VideoQuality string AudioQuality string AudioChannels int + Language string Size int64 Bitrate int MimeType string @@ -73,6 +74,7 @@ var infoCmd = &cobra.Command{ Size: size, Bitrate: bitrate, MimeType: format.MimeType, + Language: format.LanguageDisplayName(), }) } @@ -102,6 +104,7 @@ func writeInfoOutput(w io.Writer, info *VideoInfo) { "size [MB]", "bitrate", "MimeType", + "language", }) for _, format := range info.Formats { @@ -114,6 +117,7 @@ func writeInfoOutput(w io.Writer, info *VideoInfo) { fmt.Sprintf("%0.1f", float64(format.Size)/1024/1024), strconv.Itoa(format.Bitrate), format.MimeType, + format.Language, }) } diff --git a/cmd/youtubedr/url.go b/cmd/youtubedr/url.go index 7afb698c..3343b98f 100644 --- a/cmd/youtubedr/url.go +++ b/cmd/youtubedr/url.go @@ -23,7 +23,6 @@ var urlCmd = &cobra.Command{ } func init() { - addQualityFlag(urlCmd.Flags()) - addMimeTypeFlag(urlCmd.Flags()) + addVideoSelectionFlags(urlCmd.Flags()) rootCmd.AddCommand(urlCmd) } diff --git a/downloader/downloader.go b/downloader/downloader.go index 59491600..b2a61dc2 100644 --- a/downloader/downloader.go +++ b/downloader/downloader.go @@ -2,7 +2,7 @@ package downloader import ( "context" - "fmt" + "errors" "io" "os" "os/exec" @@ -59,8 +59,8 @@ func (dl *Downloader) Download(ctx context.Context, v *youtube.Video, format *yo } // DownloadComposite : Downloads audio and video streams separately and merges them via ffmpeg. -func (dl *Downloader) DownloadComposite(ctx context.Context, outputFile string, v *youtube.Video, quality string, mimetype string) error { - videoFormat, audioFormat, err1 := getVideoAudioFormats(v, quality, mimetype) +func (dl *Downloader) DownloadComposite(ctx context.Context, outputFile string, v *youtube.Video, quality string, mimetype, language string) error { + videoFormat, audioFormat, err1 := getVideoAudioFormats(v, quality, mimetype, language) if err1 != nil { return err1 } @@ -122,8 +122,7 @@ func (dl *Downloader) DownloadComposite(ctx context.Context, outputFile string, return ffmpegVersionCmd.Run() } -func getVideoAudioFormats(v *youtube.Video, quality string, mimetype string) (*youtube.Format, *youtube.Format, error) { - var videoFormat, audioFormat *youtube.Format +func getVideoAudioFormats(v *youtube.Video, quality string, mimetype, language string) (*youtube.Format, *youtube.Format, error) { var videoFormats, audioFormats youtube.FormatList formats := v.Formats @@ -138,25 +137,22 @@ func getVideoAudioFormats(v *youtube.Video, quality string, mimetype string) (*y videoFormats = videoFormats.Quality(quality) } - if len(videoFormats) > 0 { - videoFormats.Sort() - videoFormat = &videoFormats[0] + if language != "" { + audioFormats = audioFormats.Language(language) } - if len(audioFormats) > 0 { - audioFormats.Sort() - audioFormat = &audioFormats[0] + if len(videoFormats) == 0 { + return nil, nil, errors.New("no video format found after filtering") } - if videoFormat == nil { - return nil, nil, fmt.Errorf("no video format found after filtering") + if len(audioFormats) == 0 { + return nil, nil, errors.New("no audio format found after filtering") } - if audioFormat == nil { - return nil, nil, fmt.Errorf("no audio format found after filtering") - } + videoFormats.Sort() + audioFormats.Sort() - return videoFormat, audioFormat, nil + return &videoFormats[0], &audioFormats[0], nil } func (dl *Downloader) videoDLWorker(ctx context.Context, out *os.File, video *youtube.Video, format *youtube.Format) error { diff --git a/downloader/downloader_hq_test.go b/downloader/downloader_hq_test.go index 8a598e81..6cc0dd63 100644 --- a/downloader/downloader_hq_test.go +++ b/downloader/downloader_hq_test.go @@ -16,6 +16,5 @@ func TestDownload_HighQuality(t *testing.T) { video, err := testDownloader.Client.GetVideoContext(ctx, "BaW_jenozKc") require.NoError(err) - - require.NoError(testDownloader.DownloadComposite(ctx, "", video, "hd1080", "mp4")) + require.NoError(testDownloader.DownloadComposite(ctx, "", video, "hd1080", "mp4", "")) } diff --git a/downloader/downloader_test.go b/downloader/downloader_test.go index 367e897f..01942305 100644 --- a/downloader/downloader_test.go +++ b/downloader/downloader_test.go @@ -69,7 +69,7 @@ func TestYoutube_DownloadWithHighQualityFails(t *testing.T) { Formats: tt.formats, } - err := testDownloader.DownloadComposite(context.Background(), "", video, "hd1080", "") + err := testDownloader.DownloadComposite(context.Background(), "", video, "hd1080", "", "") assert.EqualError(t, err, tt.message) }) } @@ -101,7 +101,7 @@ func Test_getVideoAudioFormats(t *testing.T) { {ItagNo: 249, MimeType: "audio/webm; codecs=\"opus\"", Quality: "tiny", Bitrate: 72862, FPS: 0, Width: 0, Height: 0, LastModified: "1540474783513282", ContentLength: 24839529, QualityLabel: "", ProjectionType: "RECTANGULAR", AverageBitrate: 55914, AudioQuality: "AUDIO_QUALITY_LOW", ApproxDurationMs: "3553941", AudioSampleRate: "48000", AudioChannels: 2}, }} { - videoFormat, audioFormat, err := getVideoAudioFormats(v, "hd720", "mp4") + videoFormat, audioFormat, err := getVideoAudioFormats(v, "hd720", "mp4", "") require.NoError(err) require.NotNil(videoFormat) require.Equal(398, videoFormat.ItagNo) @@ -110,7 +110,7 @@ func Test_getVideoAudioFormats(t *testing.T) { } { - videoFormat, audioFormat, err := getVideoAudioFormats(v, "large", "webm") + videoFormat, audioFormat, err := getVideoAudioFormats(v, "large", "webm", "") require.NoError(err) require.NotNil(videoFormat) require.Equal(244, videoFormat.ItagNo) diff --git a/errors_test.go b/errors_test.go index b3b68c34..f1d8beb9 100644 --- a/errors_test.go +++ b/errors_test.go @@ -8,6 +8,8 @@ import ( ) func TestErrors(t *testing.T) { + t.Parallel() + tests := []struct { err error expected string diff --git a/format_list.go b/format_list.go index d7209056..57c98dc0 100644 --- a/format_list.go +++ b/format_list.go @@ -8,68 +8,59 @@ import ( type FormatList []Format -// FindByQuality returns the first format matching Quality or QualityLabel -// -// Examples: tiny, small, medium, large, 720p, hd720, hd1080 -func (list FormatList) FindByQuality(quality string) *Format { +// Type returns a new FormatList filtered by itag +func (list FormatList) Select(f func(Format) bool) (result FormatList) { for i := range list { - if list[i].Quality == quality || list[i].QualityLabel == quality { - return &list[i] + if f(list[i]) { + result = append(result, list[i]) } } - return nil + return result } -// FindByItag returns the first format matching the itag number -func (list FormatList) FindByItag(itagNo int) *Format { - for i := range list { - if list[i].ItagNo == itagNo { - return &list[i] - } - } - return nil +// Type returns a new FormatList filtered by itag +func (list FormatList) Itag(itagNo int) FormatList { + return list.Select(func(f Format) bool { + return f.ItagNo == itagNo + }) } -// Type returns a new FormatList filtered by mime type of video -func (list FormatList) Type(t string) (result FormatList) { - for i := range list { - if strings.Contains(list[i].MimeType, t) { - result = append(result, list[i]) - } - } - return result +// Type returns a new FormatList filtered by mime type +func (list FormatList) Type(value string) FormatList { + return list.Select(func(f Format) bool { + return strings.Contains(f.MimeType, value) + }) +} + +// Type returns a new FormatList filtered by display name +func (list FormatList) Language(displayName string) FormatList { + return list.Select(func(f Format) bool { + return f.LanguageDisplayName() == displayName + }) } // Quality returns a new FormatList filtered by quality, quality label or itag, // but not audio quality -func (list FormatList) Quality(quality string) (result FormatList) { - for _, f := range list { - itag, _ := strconv.Atoi(quality) - if itag == f.ItagNo || strings.Contains(f.Quality, quality) || strings.Contains(f.QualityLabel, quality) { - result = append(result, f) - } - } - return result +func (list FormatList) Quality(quality string) FormatList { + itag, _ := strconv.Atoi(quality) + + return list.Select(func(f Format) bool { + return itag == f.ItagNo || strings.Contains(f.Quality, quality) || strings.Contains(f.QualityLabel, quality) + }) } // AudioChannels returns a new FormatList filtered by the matching AudioChannels -func (list FormatList) AudioChannels(n int) (result FormatList) { - for _, f := range list { - if f.AudioChannels == n { - result = append(result, f) - } - } - return result +func (list FormatList) AudioChannels(n int) FormatList { + return list.Select(func(f Format) bool { + return f.AudioChannels == n + }) } // AudioChannels returns a new FormatList filtered by the matching AudioChannels -func (list FormatList) WithAudioChannels() (result FormatList) { - for _, f := range list { - if f.AudioChannels > 0 { - result = append(result, f) - } - } - return result +func (list FormatList) WithAudioChannels() FormatList { + return list.Select(func(f Format) bool { + return f.AudioChannels > 0 + }) } // FilterQuality reduces the format list to formats matching the quality diff --git a/format_list_test.go b/format_list_test.go index b134ac89..2a5e0b0a 100644 --- a/format_list_test.go +++ b/format_list_test.go @@ -6,156 +6,130 @@ import ( "github.com/stretchr/testify/assert" ) -func TestFormatList_FindByQuality(t *testing.T) { - list := []Format{{ - ItagNo: 0, +type filter struct { + Quality string + ItagNo int + MimeType string + Language string + AudioChannels int +} + +func (list FormatList) Filter(filter filter) FormatList { + if filter.ItagNo > 0 { + list = list.Itag(filter.ItagNo) + } + if filter.AudioChannels > 0 { + list = list.AudioChannels(filter.AudioChannels) + } + if filter.Quality != "" { + list = list.Quality(filter.Quality) + } + if filter.MimeType != "" { + list = list.Type(filter.MimeType) + } + if filter.Language != "" { + list = list.Language(filter.Language) + } + return list +} + +func TestFormatList_Filter(t *testing.T) { + t.Parallel() + + format1 := Format{ + ItagNo: 1, Quality: "medium", QualityLabel: "360p", - }, - { - ItagNo: 1, - Quality: "large", - QualityLabel: "480p", - }, } - type args struct { - quality string + + format2 := Format{ + ItagNo: 2, + Quality: "large", + QualityLabel: "480p", + MimeType: `video/mp4; codecs="avc1.42001E, mp4a.40.2"`, + AudioChannels: 1, + } + + formatStereo := Format{ + ItagNo: 3, + URL: "stereo", + AudioChannels: 2, + } + + list := FormatList{ + format1, + format2, + formatStereo, } + tests := []struct { name string - list FormatList - args args - want *Format + args filter + want []Format }{ { - name: "find by quality, get correct one", - list: list, - args: args{ - quality: "medium", - }, - want: &Format{ - ItagNo: 0, - Quality: "medium", - QualityLabel: "360p", + name: "empty list with quality small", + args: filter{ + Quality: "small", }, }, { - name: "find by quality label, get correct one", - list: list, - args: args{ - quality: "480p", - }, - want: &Format{ - ItagNo: 1, - Quality: "large", - QualityLabel: "480p", + name: "empty list with other itagNo", + args: filter{ + ItagNo: 99, }, }, { - name: "find nothing with quality", - list: list, - args: args{ - quality: "small", + name: "empty list with other mimeType", + args: filter{ + MimeType: "other", }, - want: nil, }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - format := tt.list.FindByQuality(tt.args.quality) - assert.Equal(t, format, tt.want) - }) - } -} - -func TestFormatList_FindByItag(t *testing.T) { - list := []Format{{ - ItagNo: 18, - }, - { - ItagNo: 135, - }, - } - type args struct { - itagNo int - } - tests := []struct { - name string - list FormatList - args args - want *Format - }{ { - name: "find itag 18", - list: list, - args: args{ - itagNo: 18, - }, - want: &Format{ - ItagNo: 18, + name: "empty list with other audioChannels", + args: filter{ + AudioChannels: 7, }, }, { - name: "find itag 135", - list: list, - args: args{ - itagNo: 135, - }, - want: &Format{ - ItagNo: 135, + name: "audioChannels stereo", + args: filter{ + AudioChannels: formatStereo.AudioChannels, }, + want: []Format{formatStereo}, }, { - name: "find nothing", - list: list, - args: args{ - itagNo: 9999, + name: "find by medium quality", + args: filter{ + Quality: "medium", }, - want: nil, + want: []Format{format1}, }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - format := tt.list.FindByItag(tt.args.itagNo) - assert.Equal(t, format, tt.want) - }) - } -} - -func TestFormatList_Type(t *testing.T) { - list := []Format{{ - MimeType: "video/mp4; codecs=\"avc1.42001E, mp4a.40.2\"", - }, - } - type args struct { - mimeType string - } - tests := []struct { - name string - list FormatList - args args - want FormatList - }{ { - name: "find video", - list: list, - args: args{ - mimeType: "video/mp4; codecs=\"avc1.42001E, mp4a.40.2\"", + name: "find by 480p", + args: filter{ + Quality: "480p", }, - want: []Format{{ - MimeType: "video/mp4; codecs=\"avc1.42001E, mp4a.40.2\"", - }}, + want: []Format{format2}, }, } + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - format := tt.list.Type("video") - assert.Equal(t, format, tt.want) + formats := list.Filter(tt.args) + + if tt.want == nil { + assert.Empty(t, formats) + } else { + assert.Equal(t, tt.want, []Format(formats)) + } }) } } func TestFormatList_Sort(t *testing.T) { + t.Parallel() + list := FormatList{ {Width: 512}, {Width: 768, MimeType: "mp4"}, @@ -170,41 +144,3 @@ func TestFormatList_Sort(t *testing.T) { {Width: 512}, }, list) } - -func TestFormatList_WithAudioChannels(t *testing.T) { - list := []Format{ - { - AudioChannels: 0, - Quality: "medium", - QualityLabel: "360p", - }, - { - AudioChannels: 1, - Quality: "large", - QualityLabel: "480p", - }, - } - tests := []struct { - name string - list FormatList - want FormatList - }{ - { - name: "find all formats with Audio Channels", - list: list, - want: []Format{ - { - AudioChannels: 1, - Quality: "large", - QualityLabel: "480p", - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - format := tt.list.WithAudioChannels() - assert.Equal(t, format, tt.want) - }) - } -} diff --git a/response_data.go b/response_data.go index 39ad2fb2..154a5728 100644 --- a/response_data.go +++ b/response_data.go @@ -126,6 +126,13 @@ type Format struct { } } +func (f *Format) LanguageDisplayName() string { + if f.AudioTrack == nil { + return "" + } + return f.AudioTrack.DisplayName +} + type Thumbnails []Thumbnail type Thumbnail struct { diff --git a/video_test.go b/video_test.go index 73c6276d..e2544506 100644 --- a/video_test.go +++ b/video_test.go @@ -16,8 +16,8 @@ func ExampleClient_GetStream() { // Typically youtube only provides separate streams for video and audio. // If you want audio and video combined, take a look a the downloader package. - format := video.Formats.FindByQuality("medium") - reader, _, err := testClient.GetStream(video, format) + formats := video.Formats.Quality("medium") + reader, _, err := testClient.GetStream(video, &formats[0]) if err != nil { panic(err) } @@ -28,7 +28,6 @@ func ExampleClient_GetStream() { } func TestSimpleTest(t *testing.T) { - video, err := testClient.GetVideo("https://www.youtube.com/watch?v=9_MbW9FK1fA") require.NoError(t, err, "get body") @@ -37,10 +36,11 @@ func TestSimpleTest(t *testing.T) { // Typically youtube only provides separate streams for video and audio. // If you want audio and video combined, take a look a the downloader package. - format := video.Formats.FindByQuality("hd1080") + formats := video.Formats.Quality("hd1080") + require.NotEmpty(t, formats) start := time.Now() - reader, _, err := testClient.GetStream(video, format) + reader, _, err := testClient.GetStream(video, &formats[0]) require.NoError(t, err, "get stream") t.Log("Duration Milliseconds: ", time.Since(start).Milliseconds()) @@ -53,7 +53,6 @@ func TestSimpleTest(t *testing.T) { } func TestDownload_Regular(t *testing.T) { - testcases := []struct { name string url string @@ -111,15 +110,13 @@ func TestDownload_Regular(t *testing.T) { video, err := testClient.GetVideo(tc.url) require.NoError(err) - var format *Format + formats := video.Formats if tc.itagNo > 0 { - format = video.Formats.FindByItag(tc.itagNo) - require.NotNil(format) - } else { - format = &video.Formats[0] + formats = formats.Itag(tc.itagNo) + require.NotEmpty(formats) } - url, err := testClient.GetStreamURL(video, format) + url, err := testClient.GetStreamURL(video, &video.Formats[0]) require.NoError(err) require.NotEmpty(url) })