Skip to content

Commit f03700d

Browse files
authored
Merge pull request #1 from Chenjinyi/devel
Add Pixabay Bing Artstation Spider 添加 Pixabay爬虫 添加 bing今日壁纸爬虫(源自chenjinyi/Image-Spider)改进 添加 Artstation爬虫 (源自chenjinyi/Image-Spider)重构 下一版本添加bilibili爬虫 (源自chenjinyi/Image-Spider)
2 parents aaaec0e + c54097e commit f03700d

File tree

11 files changed

+501
-192
lines changed

11 files changed

+501
-192
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
Api/
2+
Api/*
23
Resource/
4+
Resource/*
35
.DS_Store/
6+
.DS_Store
47
.idea/

Config.php

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,42 @@
55
* Date: 2018/7/31
66
* Time: 下午8:53
77
*/
8+
9+
date_default_timezone_set('PRC'); //设置时区
810
//定义目录名
9-
define("API_PATH","Api"); //存放API Key文件夹
10-
define("FILE_PATH","Resource"); //存放文件文件夹名
11+
define("API_PATH", "Api"); //存放API Key文件夹
12+
define("FILE_PATH", "Resource"); //存放文件文件夹名
1113

1214
//网站API链接
13-
define("PIXABAY_API_URL","https://pixabay.com/api/");
15+
define("PIXABAY_API_URL", "https://pixabay.com/api/");
16+
17+
//是否记录图片链接到数据库
18+
define("SAVE_IMAGES_URL_DATABASE", "false");
19+
20+
//数据库连接
21+
define("DATABASE_URL", "localhost");
22+
define("DATABASE_USERNAME", "root");
23+
define("DATABASE_PASSWORD", "");
24+
define("DATABASE_DATABASE", "images");
25+
26+
define("DATE_FORMAT", "n-d");
27+
//随机搜索关键词
28+
define("RAND_KEYWORD", array(
29+
'Coffee',
30+
'OverWatch',
31+
'Magic',
32+
'Red',
33+
'天使',
34+
'Franary',
35+
"cos",
36+
"lolita",
37+
'次元',
38+
'tea',
39+
'women',
40+
'jk',
41+
'game',
42+
));
43+
44+
//artatstion执行一次循环,睡一会觉觉
45+
define("ARTSTATION_SLEEP","true");
46+
define("ARTSTATION_SLEEP_TIME","180");

Readme.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# PHP-Images-Spider
2+
3+
> 继Image-Spider又一个图片爬虫
4+
5+
[Chenjinyi](https://github.com/Chenjinyi)/[Image-spider](https://github.com/Chenjinyi/Image-spider) 使用方法可以先参考
6+
7+
开发阶段 Readme晚点再补充
8+
9+
比原来的功能更强
10+
11+
Email :chenjinyi666@gmail.com
12+
### 能爬取的网站:
13+
14+
bilibili-相册
15+
16+
pixabay
17+
18+
artstation
19+
20+
~~bcy~~

Spider.php

Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,46 +5,56 @@
55
* Date: 2018/7/31
66
* Time: 下午9:01
77
*/
8-
9-
require_once "src/PublicCore.php";
8+
//引用文件
109
require_once "Config.php";
10+
require_once "Src/DbCore.php";
11+
require_once "Src/PublicCore.php";
1112

1213
//遍历目录文件
13-
function print_dir($dir_path)
14-
{
15-
$files = array();
16-
if (@$handle = opendir($dir_path)) { //注意这里要加一个@,不然会有warning错误提示:)
17-
while (($file = readdir($handle)) !== false) {
18-
if ($file != ".." && $file != ".") { //排除根目录;
19-
$files[] = $file;
20-
}
21-
}
22-
closedir($handle);
23-
return $files;
24-
}
25-
}
26-
27-
$dir = print_dir('src' . DIRECTORY_SEPARATOR . 'spider');
28-
14+
$spiderCore = new PublicCore();
15+
$dir = $spiderCore->print_dir('Src' . DIRECTORY_SEPARATOR . 'Spider');
2916

3017
//输出可以选择的爬虫
31-
$spider = "";
32-
foreach ($dir as $key => $value) {
33-
$spider .= PHP_EOL . $key . " : " . $value . PHP_EOL;
34-
}
18+
$spider = $spiderCore->print_menu($dir);
3519
$print = "
36-
=============================
37-
PHP Images Spider".
38-
PHP_EOL
39-
. $spider .
40-
PHP_EOL.
41-
"Chenjinyi:https://github.com/Chenjinyi
42-
=============================
43-
" . PHP_EOL . "请输入你选择的爬虫:";
20+
\e[33m
21+
_ooOoo_
22+
o8888888o
23+
88\" . \"88
24+
(| -_- |)
25+
O\ = /O
26+
____/`---'\____
27+
.' \\| |// `.
28+
/ \\||| : |||// \
29+
/ _||||| -:- |||||_ \
30+
| | \\\ - /'| | |
31+
| \_| `\`---'// |_/ |
32+
\ .-\__ `-. -'__/-. /
33+
___`. .' /--.--\ `. .'___
34+
.\"\" '< `.___\_<|>_/___.' _> \\
35+
| | : `- \`. ;`. _/; .'/ / .' ; |
36+
\ \ `-. \_\_`. _.'_/_/ -' _.' /
37+
===`-.`___`-.__\ \___ /__.-'_.'_.-'===
38+
`=--=-'
39+
40+
\e[0m
41+
====================================================
42+
\033[33m PHP Images Spider \033[0m" .
43+
"\033[34m".$spiderCore->eol($spider)."\033[0m".
44+
"
45+
\033[33m Chenjinyi:https://github.com/Chenjinyi \033[0m
46+
====================================================
47+
" . PHP_EOL .
48+
"请输入你选择的爬虫: ";
4449
print_r($print);
4550
$input = trim(fgets(STDIN));
4651

52+
$t1 = microtime(true);//记录运行时间
4753

4854
//使用爬虫
49-
$spider_path = 'src' . DIRECTORY_SEPARATOR . 'spider/';
50-
empty($dir[$input]) ? die('参数错误') : include_once $spider_path . $dir[$input];
55+
$spider_path = 'Src' . DIRECTORY_SEPARATOR . 'Spider/';
56+
empty($dir[$input]) ? die(PHP_EOL . '参数错误') : include_once $spider_path . $dir[$input];
57+
58+
$t2 = microtime(true);//记录运行结束时间
59+
60+
print_r(PHP_EOL . '耗时' . round($t2 - $t1, 3) . "");//输入运行时间

Src/DbCore.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<?php
2+
/**
3+
* Created by PhpStorm.
4+
* User: jinyi
5+
* Date: 2018/8/1
6+
* Time: 上午11:00
7+
*/
8+
9+
class DbCore
10+
{
11+
12+
}

Src/PublicCore.php

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
<?php
2+
/**
3+
* Created by PhpStorm.
4+
* User: jinyi
5+
* Date: 2018/7/31
6+
* Time: 下午8:39
7+
*/
8+
9+
10+
class PublicCore
11+
{
12+
/**
13+
* CURL GET请求
14+
* @param $url string 请求URL
15+
* @return mixed 返回获取信息
16+
*/
17+
public function curl_get($url, $user_agent)
18+
{
19+
$ch = curl_init(); //初始化一个cURL会话
20+
curl_setopt($ch, CURLOPT_URL, $url);//设置需要获取的 URL 地址
21+
curl_setopt($ch, CURLOPT_HTTPHEADER, $user_agent); // 设置浏览器的特定header
22+
23+
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);//不返回数据
24+
25+
if (empty($result = curl_exec($ch))) {
26+
print_r('无法连接' . $url);
27+
die();
28+
};//执行一个cURL会话
29+
return $result;
30+
}
31+
32+
33+
/**
34+
* 文件夹名
35+
* @param $string
36+
* @return string
37+
*/
38+
public function new_dir_name($string)
39+
{
40+
$path = FILE_PATH . DIRECTORY_SEPARATOR . date(DATE_FORMAT) . "-" . $string;
41+
$this->dir_create($path);
42+
return $path;
43+
}
44+
45+
/**
46+
* 下载图片(单线程)多线程版容易请求太频繁
47+
* @param $file_url array array[文件名=下载链接]
48+
* @param $dir_name string 保存的文件夹
49+
*/
50+
public function image_save($file_url, $dir_name)
51+
{ //下载
52+
foreach ($file_url as $images) {
53+
foreach ($images as $key => $value) {
54+
print_r($key.PHP_EOL);
55+
if (file_exists($dir_name . DIRECTORY_SEPARATOR . $key)) {//检测是否存在
56+
echo "已存在" . PHP_EOL;
57+
continue;
58+
} else {
59+
if ($image_save = file_get_contents($value)) {
60+
@file_put_contents($dir_name . DIRECTORY_SEPARATOR . $key, $image_save);
61+
} else {
62+
print_r("下载错误:" . $value);
63+
}
64+
}
65+
}
66+
}
67+
}
68+
69+
/**
70+
* 创建文件夹
71+
* @param $dir_name string 文件夹名
72+
*/
73+
public function dir_create($dir_name)
74+
{
75+
if (!file_exists($dir_name)) {
76+
mkdir($dir_name, 0777, true);//创建文件夹
77+
}
78+
}
79+
80+
/**
81+
* 获取提示并用户输入
82+
* @param $string
83+
* @return string
84+
*/
85+
public function user_input($string, $default)
86+
{
87+
print_r($string);
88+
$input = trim(fgets(STDIN));
89+
if (empty($input)) {
90+
print_r($default.PHP_EOL);
91+
return $default;
92+
}
93+
print_r($input.PHP_EOL);
94+
return $input;
95+
96+
}
97+
98+
/**
99+
* 初始化文件夹
100+
*/
101+
public function init_dir()
102+
{
103+
$this->dir_create(API_PATH);
104+
$this->dir_create(FILE_PATH);
105+
}
106+
107+
/**
108+
* 输出目录
109+
* @param $dir_path
110+
* @return array
111+
*/
112+
public function print_dir($dir_path)
113+
{
114+
$files = array();
115+
if (@$handle = opendir($dir_path)) { //注意这里要加一个@,不然会有warning错误提示:)
116+
while (($file = readdir($handle)) !== false) {
117+
if ($file != ".." && $file != ".") { //排除根目录;
118+
$files[] = $file;
119+
}
120+
}
121+
closedir($handle);
122+
return $files;
123+
}
124+
}
125+
126+
/**
127+
* 确认API文件存在
128+
* @param $filename
129+
* @return bool|string
130+
*/
131+
public function check_api_file($filename)
132+
{
133+
$file_path = API_PATH . DIRECTORY_SEPARATOR . $filename;
134+
if (!file_exists($file_path)) {
135+
touch($file_path);
136+
}
137+
$file = file_get_contents($file_path);
138+
if (!empty($file)) {
139+
return $file;
140+
}
141+
return false;
142+
}
143+
144+
145+
/**
146+
* 换行(没什么用的function)
147+
* @param $string
148+
* @return string
149+
*/
150+
public function eol($string)
151+
{
152+
return PHP_EOL . $string . PHP_EOL;
153+
}
154+
155+
/**
156+
* 获取文件夹内拥有多少个文件
157+
* @param $dir
158+
* @return int
159+
*/
160+
public function images_number($dir)
161+
{
162+
return count($this->print_dir($dir));
163+
164+
}
165+
166+
/**
167+
* 通过URL进行图片格式处理(只能分辨jpg/png)
168+
*/
169+
public function image_url_format($image_url,$file_name){
170+
if (strstr($image_url, "jpg")) {
171+
$file_name .= ".jpg";
172+
} elseif (strstr($image_url, "png")) {
173+
$file_name .= ".png";
174+
} else {
175+
$file_name .= $image_url.".jpeg";//不知道什么格式时的处理方式
176+
}
177+
return $file_name;
178+
}
179+
180+
/**
181+
* 一个下载调用其他函数的封装
182+
* @param $string
183+
* @param $images_arr
184+
*/
185+
public function quick_down_img($string,$images_arr){
186+
$dir_path =$this->new_dir_name($string);//生成保存路径
187+
$this->image_save($images_arr,$dir_path);//下载图片
188+
print_r("文件夹现在有:".$this->images_number($dir_path)."张图片");
189+
}
190+
191+
/**
192+
* 输出菜单
193+
* @param array $spider
194+
* @return string 菜单
195+
*/
196+
public function print_menu(array $spider){
197+
$result="";
198+
foreach ($spider as $key=>$value){
199+
$result .= PHP_EOL . $key . " : " . $value . PHP_EOL;
200+
}
201+
return $result;
202+
}
203+
}

0 commit comments

Comments
 (0)