azihassan · azihassan · Oct 16, 2023 · Oct 9, 2023 · Oct 15, 2023 · Oct 15, 2023
diff --git a/source/app.d b/source/app.d
@@ -9,9 +9,10 @@ import std.range : iota;
 import std.logger;
 import std.getopt;
 
-import parsers;
 import downloaders;
 import helpers;
+import parsers : YoutubeFormat, YoutubeVideoURLExtractor;
+import cache : Cache;
 
 void main(string[] args)
 {
@@ -74,10 +75,10 @@ void main(string[] args)
 
 void handleURL(string url, int itag, StdoutLogger logger, bool displayFormats, bool outputURL, bool parallel, bool noProgress)
 {
+    auto cache = Cache(logger);
     logger.display(formatTitle("Handling " ~ url));
-    string html = url.get().idup;
     logger.displayVerbose("Downloaded video HTML");
-    YoutubeVideoURLExtractor parser = makeParser(html, logger);
+    YoutubeVideoURLExtractor parser = cache.makeParser(url, itag);
 
     if(displayFormats)
     {

diff --git a/source/cache.d b/source/cache.d
@@ -0,0 +1,167 @@
+import std.stdio : writeln;
+import std.array : replace;
+import std.base64 : Base64URL;
+import std.conv : to;
+import std.datetime : SysTime, Clock, days;
+import std.file : exists, getcwd, readText, tempDir, write;
+import std.net.curl : get;
+import std.path : buildPath;
+import std.string : indexOf;
+
+import helpers : StdoutLogger, parseID, parseQueryString;
+import parsers : parseBaseJSURL, YoutubeVideoURLExtractor, SimpleYoutubeVideoURLExtractor, AdvancedYoutubeVideoURLExtractor;
+
+struct Cache
+{
+    private StdoutLogger logger;
+    private string delegate(string url) downloadAsString;
+    string cacheDirectory;
+
+    this(StdoutLogger logger)
+    {
+        this.logger = logger;
+        downloadAsString = (string url) => url.get().idup;
+        cacheDirectory = tempDir();
+    }
+
+    this(StdoutLogger logger, string delegate(string url) downloadAsString)
+    {
+        this(logger);
+        this.downloadAsString = downloadAsString;
+    }
+
+    YoutubeVideoURLExtractor makeParser(string url, int itag)
+    {
+        string html = getHTML(url, itag);
+        if(html.indexOf("signatureCipher") == -1)
+        {
+            return new SimpleYoutubeVideoURLExtractor(html, logger);
+        }
+        string baseJS = getBaseJS(url, itag);
+        return new AdvancedYoutubeVideoURLExtractor(html, baseJS, logger);
+    }
+
+    private string getHTML(string url, int itag)
+    {
+        string htmlCachePath = getCachePath(url) ~ ".html";
+        string baseJSCachePath = getCachePath(url) ~ ".js";
+        updateCache(url, htmlCachePath, baseJSCachePath, itag);
+        return htmlCachePath.readText();
+    }
+
+    private string getBaseJS(string url, int itag)
+    {
+        string htmlCachePath = getCachePath(url) ~ ".html";
+        string baseJSCachePath = getCachePath(url) ~ ".js";
+        updateCache(url, htmlCachePath, baseJSCachePath, itag);
+        return baseJSCachePath.readText();
+    }
+
+    private void updateCache(string url, string htmlCachePath, string baseJSCachePath, int itag)
+    {
+        bool shouldRedownload = !htmlCachePath.exists() || isStale(htmlCachePath.readText(), itag);
+        if(shouldRedownload)
+        {
+            logger.display("Cache miss, downloading HTML...");
+            string html = this.downloadAsString(url);
+            htmlCachePath.write(html);
+            string baseJS = this.downloadAsString(html.parseBaseJSURL());
+            baseJSCachePath.write(baseJS);
+        }
+        else
+        {
+            logger.display("Cache hit, skipping HTML download...");
+        }
+    }
+
+    private bool isStale(string html, int itag)
+    {
+        YoutubeVideoURLExtractor shallowParser = html.indexOf("signatureCipher") == -1
+            ? new SimpleYoutubeVideoURLExtractor(html, logger)
+            : new AdvancedYoutubeVideoURLExtractor(html, "", logger);
+        ulong expire = shallowParser.findExpirationTimestamp(itag);
+        return SysTime.fromUnixTime(expire) < Clock.currTime();
+    }
+
+    private string getCachePath(string url)
+    {
+        string cacheKey = url.parseID();
+        if(cacheKey == "")
+        {
+            cacheKey = Base64URL.encode(cast(ubyte[]) url);
+        }
+
+        return buildPath(cacheDirectory, cacheKey);
+    }
+}
+
+unittest
+{
+    writeln("Given SimpleYoutubeVideoURLExtractor, when cache is stale, should redownload HTML");
+    bool downloadAttempted;
+    auto downloadAsString = delegate string(string url) {
+        downloadAttempted = true;
+        return "zoz.html".readText();
+    };
+    auto cache = Cache(new StdoutLogger(), downloadAsString);
+    cache.cacheDirectory = getcwd();
+
+    auto parser = cache.makeParser("https://youtu.be/zoz", 18);
+    assert(downloadAttempted);
+}
+
+unittest
+{
+    writeln("Given SimpleYoutubeVideoURLExtractor, when cache is fresh, should not download HTML");
+    bool downloadAttempted;
+    auto downloadAsString = delegate string(string url) {
+        downloadAttempted = true;
+        return "zoz.html".readText();
+    };
+    SysTime tomorrow = Clock.currTime() + 1.days;
+    auto cache = Cache(new StdoutLogger(), downloadAsString);
+    cache.cacheDirectory = getcwd();
+
+    "zoz-fresh.html".write("zoz.html".readText().dup.replace("expire=1638935038", "expire=" ~ tomorrow.toUnixTime().to!string));
+
+    auto parser = cache.makeParser("https://youtu.be/zoz-fresh", 18);
+    assert(!downloadAttempted);
+}
+
+unittest
+{
+    writeln("Given AdvancedYoutubeVideoURLExtractor, when cache is stale, should redownload HTML");
+    bool downloadAttempted;
+    auto downloadAsString = delegate string(string url) {
+        downloadAttempted = true;
+        return "dQw4w9WgXcQ.html".readText();
+    };
+    auto cache = Cache(new StdoutLogger(), downloadAsString);
+    cache.cacheDirectory = getcwd();
+
+    auto parser = cache.makeParser("https://youtu.be/dQw4w9WgXcQ", 18);
+    assert(downloadAttempted);
+}
+
+unittest
+{
+    writeln("Given AdvancedYoutubeVideoURLExtractor, when cache is fresh, should not download HTML");
+    bool downloadAttempted;
+    auto downloadAsString = delegate string(string url) {
+        downloadAttempted = true;
+        return "dQw4w9WgXcQ-fresh.html".readText();
+    };
+    SysTime tomorrow = Clock.currTime() + 1.days;
+    auto cache = Cache(new StdoutLogger(), downloadAsString);
+    cache.cacheDirectory = getcwd();
+
+    //mock previously cached and fresh files
+    "dQw4w9WgXcQ-fresh.js".write("base.min.js".readText());
+    "dQw4w9WgXcQ-fresh.html".write(
+            "dQw4w9WgXcQ.html".readText().dup.replace("expire%3D1677997809", "expire%3D" ~ tomorrow.toUnixTime().to!string)
+    );
+
+
+    auto parser = cache.makeParser("https://youtu.be/dQw4w9WgXcQ-fresh", 18);
+    assert(!downloadAttempted);
+}
diff --git a/source/helpers.d b/source/helpers.d
@@ -4,7 +4,8 @@ import std.regex : ctRegex, matchFirst, escaper, regex, Captures;
 import std.algorithm : filter;
 import std.conv : to;
 import std.net.curl : HTTP;
-import std.string : split;
+import std.string : split, indexOf, startsWith;
+import std.format : formattedRead;
 
 ulong getContentLength(string url)
 {
@@ -33,6 +34,11 @@ string sanitizePath(string path)
 
 string[string] parseQueryString(string input)
 {
+    auto questionMarkIndex = input.indexOf("?");
+    if(questionMarkIndex != -1)
+    {
+        input = input[questionMarkIndex + 1 .. $];
+    }
     string[string] result;
     foreach(params; input.split("&"))
     {
@@ -158,3 +164,34 @@ string formatTitle(string input)
 {
     return "\033[1m" ~ input ~ "\033[0m";
 }
+
+string parseID(string url)
+{
+    string id;
+    if(!url.startsWith("https://"))
+    {
+        url = "https://" ~ url;
+    }
+
+    if(url.indexOf("?v=") != -1)
+    {
+        return url[url.indexOf("?") + 1 .. $].parseQueryString()["v"];
+    }
+    if(url.indexOf("youtu.be") != -1)
+    {
+        url.formattedRead!"https://youtu.be/%s"(id);
+    }
+    if(url.indexOf("shorts") != -1)
+    {
+        url.formattedRead!"https://www.youtube.com/shorts/%s"(id);
+    }
+    return id;
+}
+
+unittest
+{
+    assert("https://www.youtube.com/watch?v=-H-Fno9xbE4".parseID() == "-H-Fno9xbE4");
+    assert("https://youtu.be/-H-Fno9xbE4".parseID() == "-H-Fno9xbE4");
+    assert("https://www.youtube.com/shorts/_tT2ldpZHek".parseID() == "_tT2ldpZHek");
+    assert("qlsdkqsldkj".parseID() == "");
+}
diff --git a/source/parsers.d b/source/parsers.d
@@ -20,6 +20,7 @@ abstract class YoutubeVideoURLExtractor
     protected Document parser;
 
     abstract public string getURL(int itag = 18);
+    abstract public ulong findExpirationTimestamp(int itag);
 
     public string getTitle()
     {
@@ -89,6 +90,13 @@ class SimpleYoutubeVideoURLExtractor : YoutubeVideoURLExtractor
             .matchOrFail(`"itag":` ~ itag.to!string ~ `,"url":"(.*?)"`)
             .replace(`\u0026`, "&");
     }
+
+    override ulong findExpirationTimestamp(int itag)
+    {
+        string videoURL = getURL(itag);
+        string[string] params = videoURL.parseQueryString();
+        return params["expire"].to!ulong;
+    }
 }
 
 unittest
@@ -199,7 +207,14 @@ class AdvancedYoutubeVideoURLExtractor : YoutubeVideoURLExtractor
         return params["url"].decodeComponent() ~ "&" ~ params["sp"] ~ "=" ~ sig;
     }
 
-    private string findSignatureCipher(int itag)
+    override ulong findExpirationTimestamp(int itag)
+    {
+        string signatureCipher = findSignatureCipher(itag);
+        string[string] params = signatureCipher.parseQueryString()["url"].decodeComponent().parseQueryString();
+        return params["expire"].to!int;
+    }
+
+    string findSignatureCipher(int itag)
     {
         string encoded = "itag%3D" ~ itag.to!string;
         long index = html.indexOf(encoded);
@@ -282,7 +297,7 @@ YoutubeVideoURLExtractor makeParser(string html, StdoutLogger logger)
     return makeParser(html, baseJSURL => baseJSURL.get().idup, logger);
 }
 
-YoutubeVideoURLExtractor makeParser(string html, string function(string) performGETRequest, StdoutLogger logger)
+YoutubeVideoURLExtractor makeParser(string html, string delegate(string) performGETRequest, StdoutLogger logger)
 {
     if(html.canFind("signatureCipher"))
     {