Skip to content

Commit

Permalink
[fix/ISSUE-83] Add support for downloading HLS streams
Browse files Browse the repository at this point in the history
  • Loading branch information
azihassan committed Sep 5, 2024
1 parent 81a1d29 commit a878567
Show file tree
Hide file tree
Showing 6 changed files with 234 additions and 43 deletions.
9 changes: 7 additions & 2 deletions source/app.d
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import std.stdio : writef, stdout, writeln;
import std.algorithm : each;
import std.conv : to;
import std.string : format;
import std.string : format, endsWith;
import std.file : getcwd, write, getSize;
import std.net.curl : get;
import std.path : buildPath;
Expand Down Expand Up @@ -157,7 +157,12 @@ void handleURL(string url, int itag, StdoutLogger logger, bool displayFormats, b
logger.display("Downloading ", url, " to ", filename);

Downloader downloader;
if(parallel)
if(link.endsWith(".m3u8"))
{
logger.display("Using M3u8Downloader");
downloader = new M3u8Downloader(logger, youtubeFormat, !noProgress);
}
else if(parallel)
{
logger.display("Using ParallelDownloader");
downloader = new ParallelDownloader(logger, parser.getID(), parser.getTitle(), youtubeFormat, !noProgress);
Expand Down
1 change: 1 addition & 0 deletions source/cache.d
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ struct Cache
curl.set(CurlOption.url, url);
curl.set(CurlOption.encoding, "deflate, gzip");
curl.set(CurlOption.followlocation, true);
curl.set(CurlOption.useragent, "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)");

curl.onReceive = (ubyte[] chunk) {
result ~= chunk.map!(to!(const(char))).to!string;
Expand Down
86 changes: 82 additions & 4 deletions source/downloaders.d
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import std.stdio : writef, writeln, File;
import std.parallelism : defaultPoolThreads, taskPool, totalCPUs;
import std.algorithm : each, sort, sum, map, min;
import std.algorithm : each, sort, sum, map, min, filter;
import std.conv : to;
import std.string : startsWith, indexOf, format, split;
import std.string : startsWith, indexOf, format, split, lineSplitter;
import std.file : append, exists, read, remove, getSize;
import std.range : iota;
import std.net.curl : Curl, CurlOption, HTTP;
import std.net.curl : Curl, CurlOption, HTTP, get;
import std.math : ceil;
import helpers : getContentLength, sanitizePath, StdoutLogger, formatSuccess, formatTitle;

Expand Down Expand Up @@ -59,7 +59,8 @@ class RegularDownloader : Downloader

auto file = File(destination, "ab");
curl.set(CurlOption.url, url);
curl.set(CurlOption.useragent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0");
//curl.set(CurlOption.useragent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0");
curl.set(CurlOption.useragent, "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)");
curl.set(CurlOption.referer, referer);
curl.set(CurlOption.followlocation, true);
curl.set(CurlOption.failonerror, true);
Expand Down Expand Up @@ -149,6 +150,7 @@ class ParallelDownloader : Downloader

//request range length limit above which youtube starts throttling downloads
//https://github.com/azihassan/youtube-d/issues/65#issuecomment-2094993192

public immutable LENGTH_THROTTLING_LIMIT = 10.0 * 1024.0 * 1024.0;

this(StdoutLogger logger, string id, string title, YoutubeFormat youtubeFormat, bool progress = true)
Expand Down Expand Up @@ -274,3 +276,79 @@ class ChunkedDownloader : ParallelDownloader
}
}

//https://rr3---sn-p5h-jhoy.googlevideo.com/videoplayback/id/c303be7a57ea6f28/itag/91/source/youtube/expire/1724813705/ei/KT3OZryRFbDCmLAP0ZW0sA4/ip/102.49.55.161/requiressl/yes/ratebypass/yes/pfa/1/sgoap/clen%3D4659962%3Bdur%3D764.075%3Bgir%3Dyes%3Bitag%3D139%3Blmt%3D1724731671440441/sgovp/clen%3D8272591%3Bdur%3D763.929%3Bgir%3Dyes%3Bitag%3D160%3Blmt%3D1724731663052618/rqh/1/hls_chunk_host/rr3---sn-p5h-jhoy.googlevideo.com/xpc/EgVo2aDSNQ%3D%3D/mh/vz/mm/31,29/mn/sn-p5h-jhoy,sn-apn7en7e/ms/au,rdu/mv/m/mvi/3/pl/17/initcwndbps/391250/spc/Mv1m9jMugnpWvFVyQljKM1pZQINUP9nMutRavY8GPMP3mBAHQ9x5lGAO2u3KZYw/vprv/1/playlist_type/CLEAN/txp/6309224/mt/1724791751/fvip/3/keepalive/yes/sparams/expire,ei,ip,id,itag,source,requiressl,ratebypass,pfa,sgoap,sgovp,rqh,xpc,spc,vprv,playlist_type/sig/AJfQdSswRQIgS6erfF7F7NN8ScQJC33JIBqa3FkM9Gk7lNq0gd64a-MCIQDOBo0dLp_vVWa1lvNHVBc8mstehsyJPV3qs1bpOzS8Wg%3D%3D/lsparams/hls_chunk_host,mh,mm,mn,ms,mv,mvi,pl,initcwndbps/lsig/AGtxev0wRgIhAOk3G0guZupTB9f04t3hunhQ0zZqIT2gwXLdsywduCIeAiEA9Fz-TI3Ix8dHL9eplJ4nu7NHnSi4o4TRYBBjRgpkJss%3D/playlist/index.m3u8/govp/slices%3D0-44728/goap/slices%3D0-62899/begin/0/len/5005/gosq/0/file/seg.ts
class M3u8Downloader : Downloader
{
private StdoutLogger logger;
private int delegate(ulong length, ulong currentLength) onProgress;
private bool progress;
private YoutubeFormat youtubeFormat;

this(StdoutLogger logger, YoutubeFormat youtubeFormat, bool progress = true)
{
this.logger = logger;
this.onProgress = onProgress;
this.youtubeFormat = youtubeFormat;
this.progress = progress;
}

override public void download(string destination, string url, string referer)
{
logger.display("Length = ", youtubeFormat.length);
logger.display("progress = ", progress);
logger.display("youtubeFormat = ", youtubeFormat);
this.onProgress = (ulong _, ulong __) {
if(youtubeFormat.length == 0)
{
logger.display("youtubeFormat.length == 0");
return 0;
}
ulong current = destination.getSize();
auto percentage = 100.0 * (cast(float)(current) / youtubeFormat.length);
writef!"\r[%.2f %%] %.2f / %.2f MB"(percentage, current / 1024.0 / 1024.0, youtubeFormat.length / 1024.0 / 1024.0);
return 0;
};

string playlist = url.get().idup;
foreach(segment; playlist.lineSplitter.filter!(line => line[0] != '#'))
{
downloadSegment(destination, segment, referer);
}
}

private void downloadSegment(string destination, string url, string referer)
{
auto http = HTTP(url);

//http.verbose(logger.verbose);

auto curl = http.handle();
if(destination.exists() && destination.getSize() == youtubeFormat.length)
{
logger.display("Done !".formatSuccess());
return;
}

auto file = File(destination, "ab");
curl.set(CurlOption.url, url);
curl.set(CurlOption.useragent, "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)");
curl.set(CurlOption.referer, referer);
curl.set(CurlOption.followlocation, true);
curl.set(CurlOption.failonerror, true);
curl.set(CurlOption.connecttimeout, 60 * 3);
curl.set(CurlOption.nosignal, true);

curl.onReceive = (ubyte[] data) {
file.rawWrite(data);
return data.length;
};

if(progress)
{
curl.onProgress = (size_t total, size_t current, size_t _, size_t __) {
return onProgress(total, current);
};
}
auto result = curl.perform();
}
}
75 changes: 63 additions & 12 deletions source/helpers.d
Original file line number Diff line number Diff line change
@@ -1,44 +1,38 @@
import std.logger;
import std.stdio : writeln, writefln, File, stdout;
import std.regex : ctRegex, matchFirst, escaper, regex, Captures;
import std.algorithm : filter;
import std.regex : ctRegex, matchAll, matchFirst, escaper, regex, Captures;
import std.algorithm : filter, map, canFind, sum;
import std.conv : to;
import std.net.curl : HTTP;
import std.string : split, indexOf, startsWith, endsWith;
import std.string : split, indexOf, startsWith, endsWith, strip;
import std.format : formattedRead;
import std.range : chunks;

import parsers : YoutubeFormat, AudioVisual;

ulong getContentLength(string url, YoutubeFormat youtubeFormat)
{
writeln("url = ", url);
writeln("youtubeFormat = ", youtubeFormat.length);
if(youtubeFormat.length != 0)
{
writeln("return ", youtubeFormat.length);
return youtubeFormat.length;
}

writeln("queryString = ");
string[string] queryString = url.parseQueryString();
writeln("queryString = ", queryString);
if("range" in queryString && !queryString["range"].endsWith("-"))
{
string[] limits = queryString["range"].split("-");
writeln("return ", limits);
return limits[1].to!ulong - limits[0].to!ulong;
}

writeln("Sending head request");
auto http = HTTP(url);
http.method = HTTP.Method.head;
http.addRequestHeader("User-Agent", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0");
//http.addRequestHeader("User-Agent", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0");
http.addRequestHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)");
http.perform();
if(http.statusLine.code >= 400)
{
throw new Exception("Failed with status " ~ http.statusLine.code.to!string);
}
writeln("return ", http.responseHeaders["content-length"]);
return http.responseHeaders["content-length"].to!ulong;
}

Expand Down Expand Up @@ -298,3 +292,60 @@ unittest
assert("https://www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js".parseBaseJSKey() == "0c96dfd3");
assert("www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js".parseBaseJSKey() == "0c96dfd3");
}

YoutubeFormat[] parseM3u8Formats(string m3u8)
{
YoutubeFormat[] formats;
string[] lines = m3u8.strip().split("\n");
size_t firstChunkIndex;
do
{
firstChunkIndex++;
}
while(!lines[firstChunkIndex].startsWith("#EXT-X-STREAM-INF:"));

foreach(videoInfo; lines[firstChunkIndex .. $].chunks(2))
{
string streamInfo = videoInfo[0]["#EXT-X-STREAM-INF:".length .. $];
string url = videoInfo[1];

string codecs = videoInfo[0].matchOrFail!`CODECS="(.+)"`;
string resolution = videoInfo[0].matchOrFail!`RESOLUTION=(\d+x\d+)`;

YoutubeFormat format;
format.itag = url.matchOrFail!`\/itag\/(\d+)\/`.to!int;
format.length = url.matchAll(ctRegex!`clen%3D(\d+)`).map!(capture => capture[1].to!ulong).sum();
format.quality = resolution.split("x")[1] ~ "p";
if(codecs.canFind("mp4a"))
{
format.audioVisual ~= AudioVisual.AUDIO;
format.mimetype = "audio/mp4";
}
if(codecs.canFind("avc"))
{
format.audioVisual ~= AudioVisual.VIDEO;
format.mimetype = "video/mp4";
}

formats ~= format;
}
return formats;
}

unittest
{
writeln("Should parse HLS m3u8 formats".formatTitle());
scope(success) writeln("OK\n".formatSuccess());
string m3u8 = "tests/index.m3u8".readText();
YoutubeFormat[] expected = [
YoutubeFormat(91, 4659962 + 8272591, "144p", "video/mp4", [AudioVisual.AUDIO, AudioVisual.VIDEO]),
YoutubeFormat(92, 4659962 + 17875980, "240p", "audio/mp4", [AudioVisual.AUDIO]),
YoutubeFormat(93, 12365022 + 33446485, "360p", "video/mp4", [AudioVisual.VIDEO]),
YoutubeFormat(94, 12365022 + 62573407, "480p", "video/mp4", [AudioVisual.AUDIO, AudioVisual.VIDEO]),
YoutubeFormat(95, 12365022 + 124753691, "720p", "video/mp4", [AudioVisual.AUDIO, AudioVisual.VIDEO]),
YoutubeFormat(96, 12365022 + 239812801, "1080p", "video/mp4", [AudioVisual.AUDIO, AudioVisual.VIDEO])
];

YoutubeFormat[] actual = m3u8.parseM3u8Formats();
assert(expected == actual);
}
Loading

0 comments on commit a878567

Please sign in to comment.