From 4c3ff471e222426652c19d2a96fb3632bf5f0314 Mon Sep 17 00:00:00 2001 From: Azi Hassan Date: Tue, 2 Apr 2024 02:14:34 +0000 Subject: [PATCH] [feature/ISSUE-66] Add proxy support --- .github/workflows/test.yml | 12 ++++++++++-- script.py | 5 +++++ source/app.d | 12 +++++++++--- source/cache.d | 9 ++++++++- source/downloaders.d | 30 ++++++++++++++++++++++++++---- source/helpers.d | 4 +++- source/parsers.d | 6 ------ tests/tests-macos.sh | 34 ++++++++++++++++++++++++++++++---- tests/tests.sh | 34 ++++++++++++++++++++++++++++++---- 9 files changed, 121 insertions(+), 25 deletions(-) create mode 100644 script.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4be05ee..9de9bc1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,7 +6,7 @@ jobs: unit_test: strategy: matrix: - os: [ { name: ubuntu-20.04, extension: '' } , { name: windows-2019, extension: '.exe' }, { name: macos-latest, extension: '' } ] + os: [ { name: macos-latest, extension: '' } ] runs-on: ${{ matrix.os.name }} @@ -31,7 +31,7 @@ jobs: console_test: strategy: matrix: - os: [ { name: ubuntu-20.04, command: sh tests/tests.sh, extension: '' } , { name: windows-2019, command: powershell.exe -file tests\tests.ps1, extension: '.exe' }, { name: macos-latest, command: sh tests/tests-macos.sh, extension: '' }] + os: [{ name: macos-latest, command: sh tests/tests-macos.sh, extension: '' }] runs-on: ${{ matrix.os.name }} needs: unit_test @@ -52,6 +52,14 @@ jobs: tar -xf libcurl-7.68.0-WinSSL-zlib-x86-x64.zip xcopy dmd2\windows\bin64\libcurl.dll . + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install mitmproxy + run: pip install mitmproxy + - name: e2e tests run: ${{ matrix.os.command }} diff --git a/script.py b/script.py new file mode 100644 index 0000000..acc1a9f --- /dev/null +++ b/script.py @@ -0,0 +1,5 @@ +from mitmproxy import ctx + +def request(flow): + print(flow.request.url) + ctx.master.shutdown() diff --git a/source/app.d b/source/app.d index ced2947..525a02a 100644 --- a/source/app.d +++ b/source/app.d @@ -38,6 +38,7 @@ void main(string[] args) bool noProgress; bool noCache; bool dethrottle = true; + string proxy; version(linux) { @@ -56,6 +57,7 @@ void main(string[] args) "no-cache", "Skip caching of HTML and base.js", &noCache, "d|dethrottle", "Attempt to dethrottle download speed by solving the N challenge (defaults to true)", &dethrottle, "no-dethrottle", "Skip N-challenge dethrottling attempt", () { dethrottle = false; }, + "proxy", "Specifies a proxy in the type://host:port format", &proxy ); if(help.helpWanted || args.length == 1) @@ -85,7 +87,8 @@ void main(string[] args) parallel, noProgress, retry > 0 ? true : noCache, //force cache refresh on failure, - dethrottle + dethrottle, + proxy ); break; } @@ -105,10 +108,12 @@ void main(string[] args) } } -void handleURL(string url, int itag, StdoutLogger logger, bool displayFormats, bool outputURL, bool parallel, bool noProgress, bool noCache, bool dethrottle) +void handleURL(string url, int itag, StdoutLogger logger, bool displayFormats, bool outputURL, bool parallel, bool noProgress, bool noCache, bool dethrottle, string proxy) { logger.display(formatTitle("Handling " ~ url)); - YoutubeVideoURLExtractor parser = Cache(logger, noCache ? Yes.forceRefresh : No.forceRefresh).makeParser(url, itag); + Cache cache = Cache(logger, noCache ? Yes.forceRefresh : No.forceRefresh); + cache.proxy = proxy; + YoutubeVideoURLExtractor parser = cache.makeParser(url, itag); logger.displayVerbose("Downloaded video HTML"); logger.displayVerbose("Attempt to dethrottle : " ~ (dethrottle ? "Yes" : "No")); @@ -171,5 +176,6 @@ void handleURL(string url, int itag, StdoutLogger logger, bool displayFormats, b return 0; }, !noProgress); } + downloader.setProxy(proxy); downloader.download(destination, link, url); } diff --git a/source/cache.d b/source/cache.d index de4b30a..26c831e 100644 --- a/source/cache.d +++ b/source/cache.d @@ -11,7 +11,7 @@ import std.string : indexOf; import std.regex : ctRegex, matchFirst; import std.algorithm : map; -import helpers : StdoutLogger, parseID, parseQueryString, parseBaseJSKey; +import helpers : StdoutLogger, parseID, parseQueryString, parseBaseJSKey, USER_AGENT; import parsers : parseBaseJSURL, YoutubeVideoURLExtractor, SimpleYoutubeVideoURLExtractor, AdvancedYoutubeVideoURLExtractor; struct Cache @@ -20,6 +20,7 @@ struct Cache private string delegate(string url) downloadAsString; private Flag!"forceRefresh" forceRefresh; string cacheDirectory; + string proxy; this(StdoutLogger logger, Flag!"forceRefresh" forceRefresh = No.forceRefresh) { @@ -32,8 +33,14 @@ struct Cache Curl curl; curl.initialize(); curl.set(CurlOption.url, url); + curl.set(CurlOption.useragent, USER_AGENT); curl.set(CurlOption.encoding, "deflate, gzip"); curl.set(CurlOption.followlocation, true); + curl.set(CurlOption.followlocation, true); + if(proxy) + { + curl.set(CurlOption.proxy, proxy); + } curl.onReceive = (ubyte[] chunk) { result ~= chunk.map!(to!(const(char))).to!string; diff --git a/source/downloaders.d b/source/downloaders.d index 16fd1fd..ea38f97 100644 --- a/source/downloaders.d +++ b/source/downloaders.d @@ -6,13 +6,14 @@ import std.string : startsWith, indexOf, format, split; import std.file : append, exists, read, remove, getSize; import std.range : iota; import std.net.curl : Curl, CurlOption; -import helpers : getContentLength, sanitizePath, StdoutLogger, formatSuccess; +import helpers : getContentLength, sanitizePath, StdoutLogger, formatSuccess, USER_AGENT; import parsers : YoutubeFormat; interface Downloader { void download(string destination, string url, string referer); + Downloader setProxy(string proxy); } class RegularDownloader : Downloader @@ -20,6 +21,7 @@ class RegularDownloader : Downloader private StdoutLogger logger; private int delegate(ulong length, ulong currentLength) onProgress; private bool progress; + private string proxy; this(StdoutLogger logger, int delegate(ulong length, ulong currentLength) onProgress, bool progress = true) { @@ -28,6 +30,12 @@ class RegularDownloader : Downloader this.progress = progress; } + public Downloader setProxy(string proxy) + { + this.proxy = proxy; + return this; + } + public void download(string destination, string url, string referer) { auto http = Curl(); @@ -49,13 +57,17 @@ class RegularDownloader : Downloader auto file = File(destination, "ab"); http.set(CurlOption.url, url); - http.set(CurlOption.useragent, "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0"); + http.set(CurlOption.useragent, USER_AGENT); http.set(CurlOption.referer, referer); http.set(CurlOption.followlocation, true); http.set(CurlOption.failonerror, true); http.set(CurlOption.timeout, 60 * 3); http.set(CurlOption.connecttimeout, 60 * 3); http.set(CurlOption.nosignal, true); + if(proxy != "") + { + http.set(CurlOption.proxy, proxy); + } http.onReceiveHeader = (in char[] header) { logger.displayVerbose(header); @@ -83,6 +95,7 @@ class ParallelDownloader : Downloader private string title; private YoutubeFormat youtubeFormat; private bool progress; + private string proxy; this(StdoutLogger logger, string id, string title, YoutubeFormat youtubeFormat, bool progress = true) { @@ -93,6 +106,13 @@ class ParallelDownloader : Downloader this.progress = progress; } + public Downloader setProxy(string proxy) + { + this.proxy = proxy; + return this; + } + + public void download(string destination, string url, string referer) { ulong length = url.getContentLength(); @@ -119,7 +139,7 @@ class ParallelDownloader : Downloader logger.displayVerbose(partialDestination, " already has ", partialDestination.getSize(), " bytes, skipping"); continue; } - new RegularDownloader(logger, (ulong _, ulong __) { + auto downloader = new RegularDownloader(logger, (ulong _, ulong __) { if(length == 0) { return 0; @@ -128,7 +148,9 @@ class ParallelDownloader : Downloader auto percentage = 100.0 * (cast(float)(current) / length); writef!"\r[%.2f %%] %.2f / %.2f MB"(percentage, current / 1024.0 / 1024.0, length / 1024.0 / 1024.0); return 0; - }, progress).download(partialDestination, partialLink, url); + }, progress); + downloader.proxy = proxy; + downloader.download(partialDestination, partialLink, url); } writeln(); diff --git a/source/helpers.d b/source/helpers.d index 3b482b0..b1c959f 100644 --- a/source/helpers.d +++ b/source/helpers.d @@ -7,11 +7,13 @@ import std.net.curl : HTTP; import std.string : split, indexOf, startsWith; import std.format : formattedRead; +immutable string USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0"; + ulong getContentLength(string url) { auto http = HTTP(url); http.method = HTTP.Method.head; - http.addRequestHeader("User-Agent", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0"); + http.addRequestHeader("User-Agent", USER_AGENT); http.perform(); if(http.statusLine.code >= 400) { diff --git a/source/parsers.d b/source/parsers.d index 5bee42f..7f241d2 100644 --- a/source/parsers.d +++ b/source/parsers.d @@ -1,5 +1,4 @@ import std.json; -import std.net.curl : get; import std.uri : decodeComponent, encodeComponent; import std.stdio; import std.typecons : tuple, Tuple; @@ -356,11 +355,6 @@ unittest assert(extractor.getURL(396) == "https://rr2---sn-f5o5-jhod.googlevideo.com/videoplayback?expire=1677997809&ei=keIDZIHQKMWC1ga62YWIDQ&ip=105.66.0.249&id=o-ADmt4SY6m6445pG7f4G5f72y1NE48ZiWiqWDA9pi6iQo&itag=396&aitags=133%2C134%2C135%2C136%2C137%2C160%2C242%2C243%2C244%2C247%2C248%2C278%2C394%2C395%2C396%2C397%2C398%2C399&source=youtube&requiressl=yes&mh=7c&mm=31%2C29&mn=sn-f5o5-jhod%2Csn-h5q7knes&ms=au%2Crdu&mv=m&mvi=2&pl=24&initcwndbps=275000&vprv=1&mime=video%2Fmp4&ns=V1YGXTHGUU0a4PsRJqmYKX0L&gir=yes&clen=5953258&dur=212.040&lmt=1674230525337110&mt=1677975897&fvip=4&keepalive=yes&fexp=24007246&c=WEB&txp=4537434&n=iRrA3X-4scFA5la&sparams=expire%2Cei%2Cip%2Cid%2Caitags%2Csource%2Crequiressl%2Cvprv%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRQIgE-grPIIwKVqUa_siK-FtbLtMME0LPjp9rNlzuvLN7XQCIQCfVt03aw8T9cNgG3u_pFuQafSG4AQeKpgLEHcvodbUjA%3D%3D&sig=AOq0QJ8wRQIgadIMr0vpR2qXdJuUXwsemVtnHk62MbU6kF5SrAfOGlwCIQDYj3buw7XBrdJDtAAUL42iVe5Bfi8PRLVUK3aq-Zc2iA%3D%3D"); } -YoutubeVideoURLExtractor makeParser(string html, StdoutLogger logger) -{ - return makeParser(html, baseJSURL => baseJSURL.get().idup, logger); -} - YoutubeVideoURLExtractor makeParser(string html, string delegate(string) performGETRequest, StdoutLogger logger) { if(html.canFind("signatureCipher")) diff --git a/tests/tests-macos.sh b/tests/tests-macos.sh index 6e82e99..7704d0e 100644 --- a/tests/tests-macos.sh +++ b/tests/tests-macos.sh @@ -1,15 +1,25 @@ set -e chmod +x youtube-d +mitmdump -q -w proxydump & 1>/dev/null 2>/dev/null +trap "pkill -INT mitmdump" EXIT +echo Launching proxy... +sleep 2 +echo Proxy running in pid $(pgrep mitmdump) -./youtube-d -p --no-progress https://www.youtube.com/watch?v=R85MK830mMo +echo Installing certificate... +curl -Lo mitmproxy-ca-cert.pem --proxy http://localhost:8080 http://mitm.it/cert/pem +sudo mv mitmproxy-ca-cert.pem /usr/local/share/ca-certificates/mitmproxy.crt +zsh:1: command not found: :w + +./youtube-d -p --no-progress --proxy http://localhost:8080 https://www.youtube.com/watch?v=R85MK830mMo filename="Debugging Github actions-R85MK830mMo-18.mp4" if [ ! -e "$filename" ]; then echo "$filename not found" exit 1 else - echo "[1/3] OK, $filename exists" + echo "[1/4] OK, $filename exists" fi expected_size=7079820 @@ -18,7 +28,7 @@ if [ $expected_size -ne $actual_size ]; then echo "Wrong size. Expected $expected_size, found $actual_size" exit 1 else - echo "[2/3] OK, size is correct" + echo "[2/4] OK, size is correct" fi expected_hash="e7160d310e79a5a65f382b8ca0b198dd" @@ -27,5 +37,21 @@ if [ $expected_hash != $actual_hash ]; then echo "Wrong hash. Expected $expected_hash, found $actual_hash" exit 1 else - echo "[3/3] OK, md5sum is correct" + echo "[3/4] OK, md5sum is correct" fi + +urls=$(mitmdump -nr proxydump -s script.py) + +#if [[ $urls =~ "https://www.youtube.com/watch?v=R85MK830mMo" -a $urls =~ "base.js$" -a $urls =~ "googlevideo.com" ]]; then +for url in 'https://www.youtube.com/watch?v=R85MK830mMo' 'base.js' 'googlevideo.com'; +do + if echo $urls | grep -q $url + then + echo "\t[OK] $url" + else + echo "[4/4] Missing URL in proxy dump:" + echo $url + exit 1 + fi +done +echo "[4/4] OK, proxying worked as expected" diff --git a/tests/tests.sh b/tests/tests.sh index 4d11a7f..9553b88 100644 --- a/tests/tests.sh +++ b/tests/tests.sh @@ -1,14 +1,24 @@ set -e chmod +x youtube-d +mitmdump -q -w proxydump & 1>/dev/null 2>/dev/null +trap "pkill -INT mitmdump" EXIT +echo Launching proxy... +sleep 2 +echo Proxy running in pid $(pgrep mitmdump) -./youtube-d -p --no-progress https://www.youtube.com/watch?v=R85MK830mMo +echo Installing certificate... +curl -Lo mitmproxy-ca-cert.pem --proxy http://localhost:8080 http://mitm.it/cert/pem +sudo mv mitmproxy-ca-cert.pem /usr/local/share/ca-certificates/mitmproxy.crt +sudo update-ca-certificates + +./youtube-d -p --no-progress --proxy http://localhost:8080 https://www.youtube.com/watch?v=R85MK830mMo filename="Debugging Github actions-R85MK830mMo-18.mp4" if [ ! -e "$filename" ]; then echo "$filename not found" exit 1 else - echo "[1/3] OK, $filename exists" + echo "[1/4] OK, $filename exists" fi expected_size=7079820 @@ -17,7 +27,7 @@ if [ $expected_size -ne $actual_size ]; then echo "Wrong size. Expected $expected_size, found $actual_size" exit 1 else - echo "[2/3] OK, size is correct" + echo "[2/4] OK, size is correct" fi expected_hash="e7160d310e79a5a65f382b8ca0b198dd" @@ -26,5 +36,21 @@ if [ $expected_hash != $actual_hash ]; then echo "Wrong hash. Expected $expected_hash, found $actual_hash" exit 1 else - echo "[3/3] OK, md5sum is correct" + echo "[3/4] OK, md5sum is correct" fi + +urls=$(mitmdump -nr proxydump -s script.py) + +#if [[ $urls =~ "https://www.youtube.com/watch?v=R85MK830mMo" -a $urls =~ "base.js$" -a $urls =~ "googlevideo.com" ]]; then +for url in 'https://www.youtube.com/watch?v=R85MK830mMo' 'base.js' 'googlevideo.com'; +do + if echo $urls | grep -q $url + then + echo "\t[OK] $url" + else + echo "[4/4] Missing URL in proxy dump:" + echo $url + exit 1 + fi +done +echo "[4/4] OK, proxying worked as expected"