Skip to content

Commit 9ea45ba

Browse files
committed
Add log & Edit Spider
1 parent b5a6ec6 commit 9ea45ba

File tree

8 files changed

+143
-95
lines changed

8 files changed

+143
-95
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ Resource/
44
Resource/*
55
.DS_Store/
66
.DS_Store
7-
.idea/
7+
.idea/
8+
Log/

Config.php

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,14 @@
1010
//定义目录名
1111
define("API_PATH", "Api"); //存放API Key文件夹
1212
define("FILE_PATH", "Resource"); //存放文件文件夹名
13-
13+
define("LOG_PATH", 'Log');//log文件夹存储名
1414

1515
//网站API链接
1616
define("PIXABAY_API_URL", "https://pixabay.com/api/");
1717

18-
//是否记录图片链接到数据库
19-
define("SAVE_IMAGES_URL_DATABASE", "false");
2018

21-
//数据库连接
22-
define("DATABASE_URL", "localhost");
23-
define("DATABASE_USERNAME", "root");
24-
define("DATABASE_PASSWORD", "");
25-
define("DATABASE_DATABASE", "images");
2619

27-
define("DATE_FORMAT", "n-d");
20+
define("DATE_FORMAT", "y");//日期格式
2821
//随机搜索关键词
2922
define("RAND_KEYWORD", array(
3023
'Coffee',
@@ -42,18 +35,25 @@
4235
'jk',
4336
'game',
4437
));
38+
//通用等待设置
39+
define("SPIDERWAIT_TIME_MIN", "1");//最早等待
40+
define('SPIDERWAIT_TIME_MAX', "10");//最大等待
4541
//artatstion执行一次循环,睡一会觉觉(误)
46-
define("ARTSTATION_SLEEP","true");
47-
define("ARTSTATION_SLEEP_TIME","20");
42+
define("ARTSTATION_SLEEP", "true");
43+
define("ARTSTATION_SLEEP_TIME_MIN", "20");//最小等待
44+
define("ARTSTATION_SLEEP_TIME_MAX", "10");//最大等待
4845
//bilibili执行一次循环, 就-1s
49-
define("BILIBILI_SLEEP","true");
50-
define("BILIBILI_SLEEP_TIME","10");
51-
46+
define("BILIBILI_SLEEP", "true");
47+
define("BILIBILI_SLEEP_TIME_MIN", "10");//最小等待
48+
define("BILIBILI_SLEEP_TIME_MAX", "40");//最大等待
5249

5350

5451
//数据库链接
55-
define("DB_MS",'mysql');//数据库类型
56-
define("DB_USERNAME","root");//数据库用户名
57-
define("DB_PASSWORD","");//数据库密码
58-
define("DB_ADDRESS","127.0.0.1");//数据库地址
59-
define("DB_NAME","images");//数据库民
52+
define("SAVE_IMAGES_URL_DATABASE", "false");//是否记录图片链接到数据库
53+
define("DB_MS", 'mysql');//数据库类型
54+
define("DB_USERNAME", "root");//数据库用户名
55+
define("DB_PASSWORD", "");//数据库密码
56+
define("DB_ADDRESS", "127.0.0.1");//数据库地址
57+
define("DB_NAME", "images");//数据库名称
58+
59+
define("SPIDER_LOG", 'true');//是否使用爬取记录

Spider.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
\e[0m
4141
====================================================
4242
\033[33m PHP Images Spider \033[0m" .
43-
"\033[34m".$spiderCore->eol($spider)."\033[0m".
43+
"\033[34m" . $spiderCore->eol($spider) . "\033[0m" .
4444
"
4545
\033[33m Chenjinyi:https://github.com/Chenjinyi \033[0m
4646
====================================================

Src/PublicCore.php

Lines changed: 64 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99

1010
class PublicCore
1111
{
12+
public function __construct()
13+
{
14+
$this->init_dir();//初始化文件夹
15+
}
16+
1217
/**
1318
* CURL GET请求
1419
* @param $url string 请求URL
@@ -29,6 +34,29 @@ public function curl_get($url, $user_agent)
2934
return $result;
3035
}
3136

37+
/**
38+
* 写log
39+
*/
40+
public function add_log($spider_name, $data, $filename_data = null)
41+
{
42+
if (SPIDER_LOG) {
43+
$filename = $this->create_log($spider_name, $filename_data);
44+
file_put_contents($filename, $data, FILE_APPEND);
45+
}
46+
}
47+
48+
/**
49+
* 创建log文件夹
50+
*/
51+
public function create_log($spider_name, $filename_data)
52+
{
53+
$dir_path = LOG_PATH . DIRECTORY_SEPARATOR . date(DATE_FORMAT) . "-" . $spider_name . "-";
54+
empty($filename_data) ? $dir_path .= "log" : $dir_path .= $filename_data . "-" . "log";
55+
if (!file_exists($dir_path)) {
56+
touch($dir_path);
57+
}
58+
return $dir_path;
59+
}
3260

3361
/**
3462
* 文件夹名
@@ -47,16 +75,17 @@ public function new_dir_name($string)
4775
* @param $file_url array array[文件名=下载链接]
4876
* @param $dir_name string 保存的文件夹
4977
*/
50-
public function image_save($file_url, $dir_name)
78+
public function image_save($file_url, $dir_name, $spider_name, $filename_data = null)
5179
{ //下载
5280
foreach ($file_url as $images) {
5381
foreach ($images as $key => $value) {
54-
print_r($key.PHP_EOL);
82+
print_r($key . PHP_EOL);
5583
if (file_exists($dir_name . DIRECTORY_SEPARATOR . $key)) {//检测是否存在
5684
echo "已存在" . PHP_EOL;
5785
continue;
5886
} else {
5987
if ($image_save = file_get_contents($value)) {
88+
$this->add_log($spider_name, $key . "=>" . $value . PHP_EOL, $filename_data);
6089
@file_put_contents($dir_name . DIRECTORY_SEPARATOR . $key, $image_save);
6190
} else {
6291
print_r("下载错误:" . $value);
@@ -87,21 +116,23 @@ public function user_input($string, $default)
87116
print_r($string);
88117
$input = trim(fgets(STDIN));
89118
if (empty($input)) {
90-
print_r($default.PHP_EOL);
119+
print_r($default . PHP_EOL);
91120
return $default;
92121
}
93-
print_r($input.PHP_EOL);
122+
print_r($input . PHP_EOL);
94123
return $input;
95124

96125
}
97126

98127
/**
99128
* 初始化文件夹
129+
* 定义文件夹存放文件夹
100130
*/
101131
public function init_dir()
102132
{
103133
$this->dir_create(API_PATH);
104134
$this->dir_create(FILE_PATH);
135+
$this->dir_create(LOG_PATH);
105136
}
106137

107138
/**
@@ -166,36 +197,56 @@ public function images_number($dir)
166197
/**
167198
* 通过URL进行图片格式处理(只能分辨jpg/png)
168199
*/
169-
public function image_url_format($image_url,$file_name){
200+
public function image_url_format($image_url, $file_name)
201+
{
170202
if (strstr($image_url, "jpg")) {
171203
$file_name .= ".jpg";
172204
} elseif (strstr($image_url, "png")) {
173205
$file_name .= ".png";
174206
} else {
175-
$file_name .= $image_url.".jpeg";//不知道什么格式时的处理方式
207+
$file_name .= $image_url . ".jpeg";//不知道什么格式时的处理方式
176208
}
177209
return $file_name;
178210
}
179211

212+
213+
/**
214+
* 休息一下
215+
* @param bool $status 是否休息
216+
* @param string $min 休息时间
217+
* @param int $max 休息时间
218+
* @return string 返回休息时间
219+
*/
220+
public function spider_wait($status = true, $min = SPIDERWAIT_TIME_MIN, $max = SPIDERWAIT_TIME_MAX)
221+
{
222+
if ($status) {
223+
$num = mt_rand($min, $max);
224+
echo PHP_EOL . "爬累了,我要睡觉觉zzzzzzzzzzzzzzz" . PHP_EOL . "让我先睡" . $num . "s";
225+
sleep($num);
226+
}
227+
}
228+
180229
/**
181230
* 一个下载调用其他函数的封装
182231
* @param $string
183232
* @param $images_arr
184233
*/
185-
public function quick_down_img($string,$images_arr){
186-
$dir_path =$this->new_dir_name($string);//生成保存路径
187-
$this->image_save($images_arr,$dir_path);//下载图片
188-
print_r("文件夹现在有:".$this->images_number($dir_path)."张图片");
234+
public function quick_down_img($string, $images_arr, $spider_name, $filename_data = null)
235+
{
236+
$dir_path = $this->new_dir_name($string);//生成保存路径
237+
$this->image_save($images_arr, $dir_path, $spider_name, $filename_data);//下载图片
238+
print_r("文件夹现在有:" . $this->images_number($dir_path) . "张图片");
189239
}
190240

191241
/**
192242
* 输出菜单
193243
* @param array $spider
194244
* @return string 菜单
195245
*/
196-
public function print_menu(array $spider){
197-
$result="";
198-
foreach ($spider as $key=>$value){
246+
public function print_menu(array $spider)
247+
{
248+
$result = "";
249+
foreach ($spider as $key => $value) {
199250
$result .= PHP_EOL . $key . " : " . $value . PHP_EOL;
200251
}
201252
return $result;

Src/Spider/Artstation.php

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -48,21 +48,22 @@ public function get_img_url($result, $spiderCore)
4848
}
4949

5050
//https://www.artstation.com/search/projects.json?direction=desc&order=likes_count&page=1&q=ne&show_pro_first=true
51+
5152
/**
5253
* 图片爬取下载
5354
* @param $spiderCore
5455
* @param $spider_name
5556
* @param $parm
5657
*/
57-
public function index_spider_core($spiderCore, $spider_name,$parm)
58+
public function index_spider_core($spiderCore, $spider_name, $parm)
5859
{
5960
$posts_num = $spiderCore->user_input("请输入爬取页数(1页=50个作品)(默认为:1):", 1);
6061
for ($start_num = 1; $start_num <= $posts_num; $start_num++) {
61-
$url = "https://www.artstation.com/projects.json?page=" . $start_num.$parm;
62+
$url = "https://www.artstation.com/projects.json?page=" . $start_num . $parm;
6263
$result = $spiderCore->curl_get($url, $this->userAgent);
6364
$result = json_decode($result);
6465
$images_arr = $this->get_img_url($result, $spiderCore);
65-
$spiderCore->quick_down_img($this->spider_name . "-" . $spider_name, $images_arr);
66+
$spiderCore->quick_down_img($this->spider_name . "-" . $spider_name, $images_arr,"Artstation");
6667
$this->artatstion_sleep();//休息一下
6768
}
6869

@@ -74,30 +75,20 @@ public function index_spider_core($spiderCore, $spider_name,$parm)
7475
* @param $spider_name
7576
* @param $parm
7677
*/
77-
public function search_core($spiderCore, $spider_name,$parm)
78+
public function search_core($spiderCore, $spider_name, $parm)
7879
{
7980
$posts_num = $spiderCore->user_input("请输入爬取页数(1页=50个作品)(默认为:1):", 1);
8081
for ($start_num = 1; $start_num <= $posts_num; $start_num++) {
81-
$url = "https://www.artstation.com/search/projects.json?page=" . $start_num.$parm;
82+
$url = "https://www.artstation.com/search/projects.json?page=" . $start_num . $parm;
8283
$result = $spiderCore->curl_get($url, $this->userAgent);
8384
$result = json_decode($result);
8485
$images_arr = $this->get_img_url($result, $spiderCore);
85-
$spiderCore->quick_down_img($this->spider_name . "-" . $spider_name, $images_arr);
86-
$this->artatstion_sleep();//休息一下
86+
$spiderCore->quick_down_img($this->spider_name . "-" . $spider_name, $images_arr,"Artstation");
87+
$spiderCore->spider_wait(ARTSTATION_SLEEP, ARTSTATION_SLEEP_TIME_MIN, ARTSTATION_SLEEP_TIME_MAX);
8788
}
8889

8990
}
9091

91-
/**
92-
* config设置开启时,每执行一次循环休息一下
93-
*/
94-
public function artatstion_sleep(){
95-
if (ARTSTATION_SLEEP){
96-
print_r(PHP_EOL."爬累了,我要睡觉觉zzzzzzzzzzzzzzz".PHP_EOL);
97-
sleep(ARTSTATION_SLEEP_TIME);
98-
}
99-
}
100-
10192
/**
10293
* 指定用户爬取
10394
* @param $spiderCore
@@ -109,35 +100,35 @@ public function user($spiderCore)
109100
$result = json_decode($result);
110101

111102
$images_arr = $this->get_img_url($result, $spiderCore);
112-
$spiderCore->quick_down_img($this->spider_name . "-" . $user, $images_arr);
103+
$spiderCore->quick_down_img($this->spider_name . "-" . $user, $images_arr,"Artstation",$user);
113104
}
114105

115106
public function latest($spiderCore) //最新图片
116107
{
117-
$this->index_spider_core($spiderCore, 'latest','&sorting=latest');
108+
$this->index_spider_core($spiderCore, 'latest', '&sorting=latest');
118109
}
119110

120111
public function picks($spiderCore) //最佳
121112
{
122-
$this->index_spider_core($spiderCore, 'picks','&sorting=picks');
113+
$this->index_spider_core($spiderCore, 'picks', '&sorting=picks');
123114
}
124115

125116
public function trending($spiderCore) //最热门
126117
{
127-
$this->index_spider_core($spiderCore, 'trending','&sorting=trending');
118+
$this->index_spider_core($spiderCore, 'trending', '&sorting=trending');
128119
}
129120

130121
//https://www.artstation.com/search/projects.json?direction=desc&order=likes_count&page=1&q=dva&show_pro_first=true
131122
public function search($spiderCore)
132123
{
133-
$parm= "";
134-
$title = $spiderCore->user_input("请输入要搜索的内容(不填则随缘):",RAND_KEYWORD[mt_rand(0,count(RAND_KEYWORD)-1)]);
135-
$parm .= "&q=".$title;
136-
$show_pro_first=$spiderCore->user_input("请输入True/False".PHP_EOL."Pro用户优先?(默认 true):",true) ;
137-
$show_pro_first==="false"?$parm .= "&show_pro_first=false":$parm .= "&show_pro_first=true";
138-
$order=$spiderCore->user_input("最新还是喜欢?(默认 true 喜欢优先) :",true);
139-
$order==="false" ? $parm .= "&order=recent":$parm .= "&order=likes_count&direction=desc";
140-
$this->search_core($spiderCore,$title,$parm);
124+
$parm = "";
125+
$title = $spiderCore->user_input("请输入要搜索的内容(不填则随缘):", RAND_KEYWORD[mt_rand(0, count(RAND_KEYWORD) - 1)]);
126+
$parm .= "&q=" . $title;
127+
$show_pro_first = $spiderCore->user_input("请输入True/False" . PHP_EOL . "Pro用户优先?(默认 true):", true);
128+
$show_pro_first === "false" ? $parm .= "&show_pro_first=false" : $parm .= "&show_pro_first=true";
129+
$order = $spiderCore->user_input("最新还是喜欢?(默认 true 喜欢优先) :", true);
130+
$order === "false" ? $parm .= "&order=recent" : $parm .= "&order=likes_count&direction=desc";
131+
$this->search_core($spiderCore, $title, $parm);
141132
}
142133
}
143134

Src/Spider/Bcy.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ class Bcy
2020
];
2121
}
2222

23-
die("未完成-还在写");
23+
//https://bcy.net/coser/index/ajaxloadtoppost?p=3&type=week&date=

0 commit comments

Comments
 (0)