-
Notifications
You must be signed in to change notification settings - Fork 315
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1d8a58e
commit 3e72c5d
Showing
2 changed files
with
393 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"name": "athlon1600/youtube-downloader", | ||
"description": "PHP powered alternative for youtube-dl", | ||
"keywords": ["youtube downloader", "download youtube", "download youtube videos"], | ||
"license": "MIT", | ||
"require": { | ||
"ext-curl": "*" | ||
}, | ||
"autoload": { | ||
"files": [ | ||
"src/YouTubeDownloader.php" | ||
] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,379 @@ | ||
<?php | ||
|
||
// utils.php | ||
function sig_js_decode($player_html){ | ||
|
||
// what javascript function is responsible for signature decryption? | ||
// var l=f.sig||Xn(f.s) | ||
// a.set("signature",Xn(c));return a | ||
if(preg_match('/signature",([a-zA-Z0-9]+)\(/', $player_html, $matches)){ | ||
|
||
$func_name = $matches[1]; | ||
|
||
// extract code block from that function | ||
// xm=function(a){a=a.split("");wm.zO(a,47);wm.vY(a,1);wm.z9(a,68);wm.zO(a,21);wm.z9(a,34);wm.zO(a,16);wm.z9(a,41);return a.join("")}; | ||
if(preg_match("/{$func_name}=function\([a-z]+\){(.*?)}/", $player_html, $matches)){ | ||
|
||
$js_code = $matches[1]; | ||
|
||
// extract all relevant statements within that block | ||
// wm.vY(a,1); | ||
if(preg_match_all('/([a-z0-9]{2})\.([a-z0-9]{2})\([^,]+,(\d+)\)/i', $js_code, $matches) != false){ | ||
|
||
// must be identical | ||
$obj_list = $matches[1]; | ||
|
||
// | ||
$func_list = $matches[2]; | ||
|
||
// extract javascript code for each one of those statement functions | ||
preg_match_all('/('.implode('|', $func_list).'):function(.*?)\}/m', $player_html, $matches2, PREG_SET_ORDER); | ||
|
||
$functions = array(); | ||
|
||
// translate each function according to its use | ||
foreach($matches2 as $m){ | ||
|
||
if(strpos($m[2], 'splice') !== false){ | ||
$functions[$m[1]] = 'splice'; | ||
} else if(strpos($m[2], 'a.length') !== false){ | ||
$functions[$m[1]] = 'swap'; | ||
} else if(strpos($m[2], 'reverse') !== false){ | ||
$functions[$m[1]] = 'reverse'; | ||
} | ||
} | ||
|
||
// FINAL STEP! convert it all to instructions set | ||
$instructions = array(); | ||
|
||
foreach($matches[2] as $index => $name){ | ||
$instructions[] = array($functions[$name], $matches[3][$index]); | ||
} | ||
|
||
return $instructions; | ||
} | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
|
||
|
||
|
||
// YouTube is capitalized twice because that's how youtube itself does it: | ||
// https://developers.google.com/youtube/v3/code_samples/php | ||
class YouTubeDownloader { | ||
|
||
private $storage_dir; | ||
private $cookie_dir; | ||
|
||
private $itag_info = array( | ||
|
||
18 => "MP4 360p", | ||
22 => "MP4 720p (HD)", | ||
37 => "MP4 1080", | ||
38 => "MP4 3072p", | ||
|
||
// questionable MP4s | ||
59 => "MP4 480p", | ||
78 => "MP4 480p", | ||
|
||
43 => "WebM 360p", | ||
|
||
17 => "3GP 144p" | ||
); | ||
|
||
function __construct(){ | ||
$this->storage_dir = sys_get_temp_dir(); | ||
$this->cookie_dir = sys_get_temp_dir(); | ||
} | ||
|
||
function setStorageDir($dir){ | ||
$this->storage_dir = $dir; | ||
} | ||
|
||
// what identifies each request? user agent, cookies... | ||
public function curl($url){ | ||
|
||
$ch = curl_init($url); | ||
|
||
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0'); | ||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | ||
curl_setopt($ch, CURLOPT_HEADER, 0); | ||
|
||
//curl_setopt($ch, CURLOPT_COOKIEJAR, $tmpfname); | ||
//curl_setopt($ch, CURLOPT_COOKIEFILE, $tmpfname); | ||
|
||
//curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); | ||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0); | ||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); | ||
|
||
$result = curl_exec($ch); | ||
curl_close($ch); | ||
|
||
return $result; | ||
} | ||
|
||
public static function head($url){ | ||
|
||
$ch = curl_init($url); | ||
|
||
curl_setopt($ch, CURLOPT_HEADER, 1); | ||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | ||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0); | ||
curl_setopt($ch, CURLOPT_NOBODY, 1); | ||
$result = curl_exec($ch); | ||
curl_close($ch); | ||
|
||
return http_parse_headers($result); | ||
} | ||
|
||
// html code of watch?v=aaa | ||
private function getInstructions($html){ | ||
|
||
// check what player version that video is using | ||
if(preg_match('@<script\s*src="([^"]+player-en_US[^"]+)@', $html, $matches)){ | ||
|
||
$player_url = str_replace('//', 'http://', $matches[1]); | ||
|
||
// try to find instructions list already cached from previous requests... | ||
$file_path = $this->storage_dir.'/'.md5($player_url); | ||
|
||
if(file_exists($file_path)){ | ||
|
||
// unserialize could fail on empty file | ||
$str = file_get_contents($file_path); | ||
return unserialize($str);; | ||
|
||
} else { | ||
|
||
$js_code = $this->curl($player_url); | ||
$instructions = sig_js_decode($js_code); | ||
|
||
if($instructions){ | ||
file_put_contents($file_path, serialize($instructions)); | ||
return $instructions; | ||
} | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
// this is in beta mode!! | ||
public function stream($id){ | ||
|
||
$links = $this->getDownloadLinks($id, "mp4"); | ||
|
||
if(count($links) == 0){ | ||
die("no url found!"); | ||
} | ||
|
||
$url = $links[0]['url']; | ||
|
||
$ch = curl_init(); | ||
|
||
$headers = array(); | ||
|
||
if(isset($_SERVER['HTTP_RANGE'])){ | ||
$headers[] = 'Range: '.$_SERVER['HTTP_RANGE']; | ||
} | ||
|
||
$headers[] = 'User-Agent: Mozilla/5.0 (Windows NT 6.3; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0'; | ||
|
||
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); | ||
|
||
|
||
//curl_setopt($ch, CURLOPT_BUFFERSIZE, 4096); | ||
curl_setopt($ch, CURLOPT_URL, $url); | ||
|
||
//curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); | ||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0); | ||
|
||
// we deal with this ourselves | ||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0); | ||
curl_setopt($ch, CURLOPT_HEADER, 0); | ||
|
||
// whether request to video success | ||
$headers = ''; | ||
$headers_sent = false; | ||
$success = false; | ||
|
||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, function($ch, $data) use (&$headers, &$headers_sent){ | ||
|
||
$headers .= $data; | ||
|
||
// this should be first line | ||
if(preg_match('@HTTP\/\d\.\d\s(\d+)@', $data, $matches)){ | ||
$status_code = $matches[1]; | ||
|
||
if($status_code == 200 || $status_code == 206){ | ||
$headers_sent = true; | ||
header(rtrim($data)); | ||
} | ||
|
||
} else { | ||
|
||
// only headers we wish to forward back to the client | ||
$forward = array('content-type', 'content-length', 'accept-ranges', 'content-range'); | ||
|
||
$parts = explode(':', $data, 2); | ||
|
||
if($headers_sent && count($parts) == 2 && in_array(trim(strtolower($parts[0])), $forward)){ | ||
header(rtrim($data)); | ||
} | ||
} | ||
|
||
return strlen($data); | ||
}); | ||
|
||
|
||
// if response is empty - this never gets called | ||
curl_setopt($ch, CURLOPT_WRITEFUNCTION, function($curl, $data) use (&$headers_sent){ | ||
|
||
if($headers_sent){ | ||
echo $data; | ||
flush(); | ||
} | ||
|
||
return strlen($data); | ||
}); | ||
|
||
$ret = @curl_exec($ch); | ||
curl_close($ch); | ||
|
||
// if we are still here by now, return status_code | ||
return true; | ||
} | ||
|
||
// extract youtube video_id from any piece of text | ||
public function extractId($str){ | ||
|
||
if(preg_match('/[a-z0-9_-]{11}/i', $str, $matches)){ | ||
return $matches[0]; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
// selector by format: mp4 360, | ||
private function selectFirst($links, $selector){ | ||
|
||
$result = array(); | ||
$formats = preg_split('/\s*,\s*/', $selector); | ||
|
||
// has to be in this order | ||
foreach($formats as $f){ | ||
|
||
foreach($links as $l){ | ||
|
||
if(stripos($l['format'], $f) !== false || $f == 'any'){ | ||
$result[] = $l; | ||
} | ||
} | ||
} | ||
|
||
return $result; | ||
} | ||
|
||
// options | deep_links | append_redirector | ||
public function getDownloadLinks($id, $selector = false){ | ||
|
||
$result = array(); | ||
$instructions = array(); | ||
|
||
// you can input HTML of /watch? page directory instead of id | ||
if(strpos($id, '<div id="player') !== false){ | ||
$html = $id; | ||
} else { | ||
$video_id = $this->extractId($id); | ||
|
||
if(!$video_id){ | ||
return false; | ||
} | ||
|
||
$html = $this->curl("https://www.youtube.com/watch?v={$video_id}"); | ||
} | ||
|
||
// age-gate | ||
if(strpos($html, 'player-age-gate-content') !== false){ | ||
// nothing you can do folks... | ||
return false; | ||
} | ||
|
||
// http://stackoverflow.com/questions/35608686/how-can-i-get-the-actual-video-url-of-a-youtube-live-stream | ||
if(preg_match('@url_encoded_fmt_stream_map["\']:\s*["\']([^"\'\s]*)@', $html, $matches)){ | ||
|
||
$parts = explode(",", $matches[1]); | ||
|
||
foreach($parts as $p){ | ||
$query = str_replace('\u0026', '&', $p); | ||
parse_str($query, $arr); | ||
|
||
$url = $arr['url']; | ||
|
||
if(isset($arr['sig'])){ | ||
$url = $url.'&signature='.$arr['sig']; | ||
|
||
} else if(isset($arr['signature'])){ | ||
$url = $url.'&signature='.$arr['signature']; | ||
|
||
} else if(isset($arr['s'])){ | ||
|
||
// this is probably a VEVO/ads video... signature must be decrypted first! We need instructions for doing that | ||
if(count($instructions) == 0){ | ||
$instructions = (array)$this->getInstructions($html); | ||
} | ||
|
||
$dec = $this->sig_decipher($arr['s'], $instructions); | ||
$url = $url.'&signature='.$dec; | ||
} | ||
|
||
// redirector.googlevideo.com | ||
//$url = preg_replace('@(\/\/)[^\.]+(\.googlevideo\.com)@', '$1redirector$2', $url); | ||
|
||
$itag = $arr['itag']; | ||
$format = isset($this->itag_info[$itag]) ? $this->itag_info[$itag] : 'Unknown'; | ||
|
||
$result[$itag] = array( | ||
'url' => $url, | ||
'format' => $format | ||
); | ||
} | ||
} | ||
|
||
// do we want all links or just select few? | ||
if($selector){ | ||
return $this->selectFirst($result, $selector); | ||
} | ||
|
||
return $result; | ||
} | ||
|
||
private function sig_decipher($signature, $instructions){ | ||
|
||
foreach($instructions as $opt){ | ||
|
||
$command = $opt[0]; | ||
$value = $opt[1]; | ||
|
||
if($command == 'swap'){ | ||
|
||
$temp = $signature[0]; | ||
$signature[0] = $signature[$value % strlen($signature)]; | ||
$signature[$value] = $temp; | ||
|
||
} else if($command == 'splice'){ | ||
$signature = substr($signature, $value); | ||
} else if($command == 'reverse'){ | ||
$signature = strrev($signature); | ||
} | ||
} | ||
|
||
return trim($signature); | ||
} | ||
} | ||
|
||
|
||
?> |