Skip to content

Commit

Permalink
[feature/ISSUE-28] Cache base.js based on base.js URL and not video ID
Browse files Browse the repository at this point in the history
  • Loading branch information
azihassan committed Dec 29, 2023
1 parent 8f9ce43 commit 2cb86b7
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 19 deletions.
135 changes: 116 additions & 19 deletions source/cache.d
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ import std.array : replace;
import std.base64 : Base64URL;
import std.conv : to;
import std.datetime : SysTime, Clock, days;
import std.file : exists, getcwd, readText, tempDir, write;
import std.file : exists, getcwd, readText, remove, tempDir, write;
import std.net.curl : get;
import std.path : buildPath;
import std.typecons : Flag, Yes, No;
import std.string : indexOf;

import helpers : StdoutLogger, parseID, parseQueryString;
import helpers : StdoutLogger, parseID, parseQueryString, parseBaseJSKey;
import parsers : parseBaseJSURL, YoutubeVideoURLExtractor, SimpleYoutubeVideoURLExtractor, AdvancedYoutubeVideoURLExtractor;

struct Cache
Expand All @@ -31,50 +31,67 @@ struct Cache
{
this(logger);
this.downloadAsString = downloadAsString;
this.forceRefresh = forceRefresh;
}

YoutubeVideoURLExtractor makeParser(string url, int itag)
{
string htmlCachePath = getCachePath(url) ~ ".html";
string baseJSCachePath = getCachePath(url) ~ ".js";
updateCache(url, htmlCachePath, baseJSCachePath, itag);

string htmlCachePath = getHTMLCachePath(url) ~ ".html";
updateHTMLCache(url, htmlCachePath, itag);
string html = htmlCachePath.readText();

string baseJSURL = html.parseBaseJSURL();
string baseJSCachePath = getBaseJSCachePath(baseJSURL) ~ ".js";
updateBaseJSCache(baseJSURL, baseJSCachePath, itag);
string baseJS = baseJSCachePath.readText();
if(html.indexOf("signatureCipher:") == -1)

if(html.indexOf("signatureCipher") == -1)
{
return new SimpleYoutubeVideoURLExtractor(html, baseJS, logger);
}
return new AdvancedYoutubeVideoURLExtractor(html, baseJS, logger);
}

private void updateCache(string url, string htmlCachePath, string baseJSCachePath, int itag)
private void updateHTMLCache(string url, string htmlCachePath, int itag)
{
bool shouldRedownload = forceRefresh || !htmlCachePath.exists() || isStale(htmlCachePath.readText(), itag);
if(shouldRedownload)
{
logger.display("Cache miss, downloading HTML...");
string html = this.downloadAsString(url);
htmlCachePath.write(html);
string baseJS = this.downloadAsString(html.parseBaseJSURL());
baseJSCachePath.write(baseJS);
}
else
{
logger.display("Cache hit, skipping HTML download...");
}
}

private void updateBaseJSCache(string url, string baseJSCachePath, int itag)
{
bool shouldRedownload = forceRefresh || !baseJSCachePath.exists();
if(shouldRedownload)
{
logger.display("base.js cache miss, downloading from " ~ url);
string baseJS = this.downloadAsString(url);
baseJSCachePath.write(baseJS);
}
else
{
logger.display("base.js cache hit, skipping download...");
}
}

private bool isStale(string html, int itag)
{
YoutubeVideoURLExtractor shallowParser = html.indexOf("signatureCipher:") == -1
YoutubeVideoURLExtractor shallowParser = html.indexOf("signatureCipher") == -1
? new SimpleYoutubeVideoURLExtractor(html, "", logger)
: new AdvancedYoutubeVideoURLExtractor(html, "", logger);
ulong expire = shallowParser.findExpirationTimestamp(itag);
return SysTime.fromUnixTime(expire) < Clock.currTime();
}

private string getCachePath(string url)
private string getHTMLCachePath(string url)
{
string cacheKey = url.parseID();
if(cacheKey == "")
Expand All @@ -84,14 +101,28 @@ struct Cache

return buildPath(cacheDirectory, cacheKey);
}

private string getBaseJSCachePath(string url)
{
string cacheKey = url.parseBaseJSKey();
if(cacheKey == "")
{
cacheKey = Base64URL.encode(cast(ubyte[]) url);
}

return buildPath(cacheDirectory, cacheKey);
}
}

unittest
{
writeln("Given SimpleYoutubeVideoURLExtractor, when cache is stale, should redownload HTML");
bool downloadAttempted;
auto downloadAsString = delegate string(string url) {
downloadAttempted = true;
if(url == "https://youtu.be/zoz")
{
downloadAttempted = true;
}
return "zoz.html".readText();
};
auto cache = Cache(new StdoutLogger(), downloadAsString);
Expand All @@ -106,15 +137,17 @@ unittest
writeln("Given SimpleYoutubeVideoURLExtractor, when cache is fresh, should not download HTML");
bool downloadAttempted;
auto downloadAsString = delegate string(string url) {
downloadAttempted = true;
if(url == "https://youtu.be/zoz-fresh")
{
downloadAttempted = true;
}
return "zoz.html".readText();
};
SysTime tomorrow = Clock.currTime() + 1.days;
auto cache = Cache(new StdoutLogger(), downloadAsString);
cache.cacheDirectory = getcwd();

"zoz-fresh.html".write("zoz.html".readText().dup.replace("expire=1638935038", "expire=" ~ tomorrow.toUnixTime().to!string));
"zoz-fresh.js".write("base.min.js".readText());

auto parser = cache.makeParser("https://youtu.be/zoz-fresh", 18);
assert(!downloadAttempted);
Expand All @@ -125,7 +158,10 @@ unittest
writeln("Given AdvancedYoutubeVideoURLExtractor, when cache is stale, should redownload HTML");
bool downloadAttempted;
auto downloadAsString = delegate string(string url) {
downloadAttempted = true;
if(url == "https://youtu.be/dQw4w9WgXcQ")
{
downloadAttempted = true;
}
return "dQw4w9WgXcQ.html".readText();
};
auto cache = Cache(new StdoutLogger(), downloadAsString);
Expand All @@ -140,15 +176,17 @@ unittest
writeln("Given AdvancedYoutubeVideoURLExtractor, when cache is fresh, should not download HTML");
bool downloadAttempted;
auto downloadAsString = delegate string(string url) {
downloadAttempted = true;
if(url == "https://youtu.be/dQw4w9WgXcQ-fresh")
{
downloadAttempted = true;
}
return "dQw4w9WgXcQ-fresh.html".readText();
};
SysTime tomorrow = Clock.currTime() + 1.days;
auto cache = Cache(new StdoutLogger(), downloadAsString);
cache.cacheDirectory = getcwd();

//mock previously cached and fresh files
"dQw4w9WgXcQ-fresh.js".write("base.min.js".readText());
"dQw4w9WgXcQ-fresh.html".write(
"dQw4w9WgXcQ.html".readText().dup.replace("expire%3D1677997809", "expire%3D" ~ tomorrow.toUnixTime().to!string)
);
Expand All @@ -162,13 +200,72 @@ unittest
{
writeln("When forcing refresh, should download HTML");
bool downloadAttempted;
bool baseJSDownloadAttempted;
auto downloadAsString = delegate string(string url) {
downloadAttempted = true;
writeln("downloadAsString : ", url);
if(url == "https://youtu.be/zoz")
{
downloadAttempted = true;
}
if(url == "https://www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js")
{
baseJSDownloadAttempted = true;
}
return "zoz.html".readText();
};
auto cache = Cache(new StdoutLogger(), downloadAsString, Yes.forceRefresh);
cache.cacheDirectory = getcwd();

auto parser = cache.makeParser("https://youtu.be/zoz", 18);
assert(downloadAttempted);
assert(baseJSDownloadAttempted);
}

unittest
{
writeln("When base.js is cached, should read from cache");
"0c96dfd3.js".write("base.min.js".readText());

bool baseJSDownloadAttempted;
auto downloadAsString = delegate string(string url) {
if(url == "https://www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js")
{
baseJSDownloadAttempted = true;
return "0c96dfd3.js".readText();
}
return "zoz.html".readText();
};
auto cache = Cache(new StdoutLogger(), downloadAsString);
cache.cacheDirectory = getcwd();

auto parser = cache.makeParser("https://youtu.be/zoz", 18);
assert(!baseJSDownloadAttempted);
}

unittest
{
writeln("When base.js is not cached, should download it");
if("0c96dfd3.js".exists())
{
"0c96dfd3.js".remove();
}
scope(exit)
{
"0c96dfd3.js".remove();
}

bool baseJSDownloadAttempted;
auto downloadAsString = delegate string(string url) {
if(url == "https://www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js")
{
baseJSDownloadAttempted = true;
return "base.min.js".readText();
}
return "zoz.html".readText();
};
auto cache = Cache(new StdoutLogger(), downloadAsString);
cache.cacheDirectory = getcwd();

auto parser = cache.makeParser("https://youtu.be/zoz", 18);
assert(baseJSDownloadAttempted);
}
21 changes: 21 additions & 0 deletions source/helpers.d
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,24 @@ unittest
assert("https://www.youtube.com/shorts/_tT2ldpZHek".parseID() == "_tT2ldpZHek");
assert("qlsdkqsldkj".parseID() == "");
}

string parseBaseJSKey(string url)
{
string id;
if(url.startsWith("https://"))
{
url = url["https://".length .. $];
}
if(url.startsWith("www.youtube.com"))
{
url = url["www.youtube.com".length .. $];
}
return url.split("/")[3];
}

unittest
{
assert("/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js".parseBaseJSKey() == "0c96dfd3");
assert("https://www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js".parseBaseJSKey() == "0c96dfd3");
assert("www.youtube.com/s/player/0c96dfd3/player_ias.vflset/ar_EG/base.js".parseBaseJSKey() == "0c96dfd3");
}

0 comments on commit 2cb86b7

Please sign in to comment.