Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
Api/
Api/*
Resource/
Resource/*
.DS_Store/
.DS_Store
.idea/
39 changes: 36 additions & 3 deletions Config.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,42 @@
* Date: 2018/7/31
* Time: 下午8:53
*/

date_default_timezone_set('PRC'); //设置时区
//定义目录名
define("API_PATH","Api"); //存放API Key文件夹
define("FILE_PATH","Resource"); //存放文件文件夹名
define("API_PATH", "Api"); //存放API Key文件夹
define("FILE_PATH", "Resource"); //存放文件文件夹名

//网站API链接
define("PIXABAY_API_URL","https://pixabay.com/api/");
define("PIXABAY_API_URL", "https://pixabay.com/api/");

//是否记录图片链接到数据库
define("SAVE_IMAGES_URL_DATABASE", "false");

//数据库连接
define("DATABASE_URL", "localhost");
define("DATABASE_USERNAME", "root");
define("DATABASE_PASSWORD", "");
define("DATABASE_DATABASE", "images");

define("DATE_FORMAT", "n-d");
//随机搜索关键词
define("RAND_KEYWORD", array(
'Coffee',
'OverWatch',
'Magic',
'Red',
'天使',
'Franary',
"cos",
"lolita",
'次元',
'tea',
'women',
'jk',
'game',
));

//artatstion执行一次循环,睡一会觉觉
define("ARTSTATION_SLEEP","true");
define("ARTSTATION_SLEEP_TIME","180");
20 changes: 20 additions & 0 deletions Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# PHP-Images-Spider

> 继Image-Spider又一个图片爬虫

[Chenjinyi](https://github.com/Chenjinyi)/[Image-spider](https://github.com/Chenjinyi/Image-spider) 使用方法可以先参考

开发阶段 Readme晚点再补充

比原来的功能更强

Email :chenjinyi666@gmail.com
### 能爬取的网站:

bilibili-相册

pixabay

artstation

~~bcy~~
74 changes: 42 additions & 32 deletions Spider.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,46 +5,56 @@
* Date: 2018/7/31
* Time: 下午9:01
*/

require_once "src/PublicCore.php";
//引用文件
require_once "Config.php";
require_once "Src/DbCore.php";
require_once "Src/PublicCore.php";

//遍历目录文件
function print_dir($dir_path)
{
$files = array();
if (@$handle = opendir($dir_path)) { //注意这里要加一个@,不然会有warning错误提示:)
while (($file = readdir($handle)) !== false) {
if ($file != ".." && $file != ".") { //排除根目录;
$files[] = $file;
}
}
closedir($handle);
return $files;
}
}

$dir = print_dir('src' . DIRECTORY_SEPARATOR . 'spider');

$spiderCore = new PublicCore();
$dir = $spiderCore->print_dir('Src' . DIRECTORY_SEPARATOR . 'Spider');

//输出可以选择的爬虫
$spider = "";
foreach ($dir as $key => $value) {
$spider .= PHP_EOL . $key . " : " . $value . PHP_EOL;
}
$spider = $spiderCore->print_menu($dir);
$print = "
=============================
PHP Images Spider".
PHP_EOL
. $spider .
PHP_EOL.
"Chenjinyi:https://github.com/Chenjinyi
=============================
" . PHP_EOL . "请输入你选择的爬虫:";
\e[33m
_ooOoo_
o8888888o
88\" . \"88
(| -_- |)
O\ = /O
____/`---'\____
.' \\| |// `.
/ \\||| : |||// \
/ _||||| -:- |||||_ \
| | \\\ - /'| | |
| \_| `\`---'// |_/ |
\ .-\__ `-. -'__/-. /
___`. .' /--.--\ `. .'___
.\"\" '< `.___\_<|>_/___.' _> \\
| | : `- \`. ;`. _/; .'/ / .' ; |
\ \ `-. \_\_`. _.'_/_/ -' _.' /
===`-.`___`-.__\ \___ /__.-'_.'_.-'===
`=--=-'

\e[0m
====================================================
\033[33m PHP Images Spider \033[0m" .
"\033[34m".$spiderCore->eol($spider)."\033[0m".
"
\033[33m Chenjinyi:https://github.com/Chenjinyi \033[0m
====================================================
" . PHP_EOL .
"请输入你选择的爬虫: ";
print_r($print);
$input = trim(fgets(STDIN));

$t1 = microtime(true);//记录运行时间

//使用爬虫
$spider_path = 'src' . DIRECTORY_SEPARATOR . 'spider/';
empty($dir[$input]) ? die('参数错误') : include_once $spider_path . $dir[$input];
$spider_path = 'Src' . DIRECTORY_SEPARATOR . 'Spider/';
empty($dir[$input]) ? die(PHP_EOL . '参数错误') : include_once $spider_path . $dir[$input];

$t2 = microtime(true);//记录运行结束时间

print_r(PHP_EOL . '耗时' . round($t2 - $t1, 3) . "秒");//输入运行时间
12 changes: 12 additions & 0 deletions Src/DbCore.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?php
/**
* Created by PhpStorm.
* User: jinyi
* Date: 2018/8/1
* Time: 上午11:00
*/

class DbCore
{

}
203 changes: 203 additions & 0 deletions Src/PublicCore.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
<?php
/**
* Created by PhpStorm.
* User: jinyi
* Date: 2018/7/31
* Time: 下午8:39
*/


class PublicCore
{
/**
* CURL GET请求
* @param $url string 请求URL
* @return mixed 返回获取信息
*/
public function curl_get($url, $user_agent)
{
$ch = curl_init(); //初始化一个cURL会话
curl_setopt($ch, CURLOPT_URL, $url);//设置需要获取的 URL 地址
curl_setopt($ch, CURLOPT_HTTPHEADER, $user_agent); // 设置浏览器的特定header

curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);//不返回数据

if (empty($result = curl_exec($ch))) {
print_r('无法连接' . $url);
die();
};//执行一个cURL会话
return $result;
}


/**
* 文件夹名
* @param $string
* @return string
*/
public function new_dir_name($string)
{
$path = FILE_PATH . DIRECTORY_SEPARATOR . date(DATE_FORMAT) . "-" . $string;
$this->dir_create($path);
return $path;
}

/**
* 下载图片(单线程)多线程版容易请求太频繁
* @param $file_url array array[文件名=下载链接]
* @param $dir_name string 保存的文件夹
*/
public function image_save($file_url, $dir_name)
{ //下载
foreach ($file_url as $images) {
foreach ($images as $key => $value) {
print_r($key.PHP_EOL);
if (file_exists($dir_name . DIRECTORY_SEPARATOR . $key)) {//检测是否存在
echo "已存在" . PHP_EOL;
continue;
} else {
if ($image_save = file_get_contents($value)) {
@file_put_contents($dir_name . DIRECTORY_SEPARATOR . $key, $image_save);
} else {
print_r("下载错误:" . $value);
}
}
}
}
}

/**
* 创建文件夹
* @param $dir_name string 文件夹名
*/
public function dir_create($dir_name)
{
if (!file_exists($dir_name)) {
mkdir($dir_name, 0777, true);//创建文件夹
}
}

/**
* 获取提示并用户输入
* @param $string
* @return string
*/
public function user_input($string, $default)
{
print_r($string);
$input = trim(fgets(STDIN));
if (empty($input)) {
print_r($default.PHP_EOL);
return $default;
}
print_r($input.PHP_EOL);
return $input;

}

/**
* 初始化文件夹
*/
public function init_dir()
{
$this->dir_create(API_PATH);
$this->dir_create(FILE_PATH);
}

/**
* 输出目录
* @param $dir_path
* @return array
*/
public function print_dir($dir_path)
{
$files = array();
if (@$handle = opendir($dir_path)) { //注意这里要加一个@,不然会有warning错误提示:)
while (($file = readdir($handle)) !== false) {
if ($file != ".." && $file != ".") { //排除根目录;
$files[] = $file;
}
}
closedir($handle);
return $files;
}
}

/**
* 确认API文件存在
* @param $filename
* @return bool|string
*/
public function check_api_file($filename)
{
$file_path = API_PATH . DIRECTORY_SEPARATOR . $filename;
if (!file_exists($file_path)) {
touch($file_path);
}
$file = file_get_contents($file_path);
if (!empty($file)) {
return $file;
}
return false;
}


/**
* 换行(没什么用的function)
* @param $string
* @return string
*/
public function eol($string)
{
return PHP_EOL . $string . PHP_EOL;
}

/**
* 获取文件夹内拥有多少个文件
* @param $dir
* @return int
*/
public function images_number($dir)
{
return count($this->print_dir($dir));

}

/**
* 通过URL进行图片格式处理(只能分辨jpg/png)
*/
public function image_url_format($image_url,$file_name){
if (strstr($image_url, "jpg")) {
$file_name .= ".jpg";
} elseif (strstr($image_url, "png")) {
$file_name .= ".png";
} else {
$file_name .= $image_url.".jpeg";//不知道什么格式时的处理方式
}
return $file_name;
}

/**
* 一个下载调用其他函数的封装
* @param $string
* @param $images_arr
*/
public function quick_down_img($string,$images_arr){
$dir_path =$this->new_dir_name($string);//生成保存路径
$this->image_save($images_arr,$dir_path);//下载图片
print_r("文件夹现在有:".$this->images_number($dir_path)."张图片");
}

/**
* 输出菜单
* @param array $spider
* @return string 菜单
*/
public function print_menu(array $spider){
$result="";
foreach ($spider as $key=>$value){
$result .= PHP_EOL . $key . " : " . $value . PHP_EOL;
}
return $result;
}
}
Loading