Skip to content

Commit

Permalink
Add:log4j
Browse files Browse the repository at this point in the history
  • Loading branch information
gaorui authored and gaorui committed Nov 13, 2017
1 parent b1bf412 commit c7f3ddc
Show file tree
Hide file tree
Showing 13 changed files with 103 additions and 31 deletions.
15 changes: 15 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,20 @@
<version>4.11</version>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.6.6</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.6.6</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
</dependency>
</dependencies>
</project>
7 changes: 5 additions & 2 deletions src/main/java/com/myapp/client/Client.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import com.myapp.crawler4j.Controller;
import com.myapp.proxy.ProxyPool;
import org.apache.log4j.Logger;
import org.quartz.*;


Expand All @@ -12,6 +13,8 @@
@DisallowConcurrentExecution
public class Client implements StatefulJob {

private static Logger logger = Logger.getLogger(Client.class);

private static int count = 0;

public static ProxyPool proxyPool = new ProxyPool();
Expand All @@ -22,8 +25,8 @@ public void execute(JobExecutionContext jobExecutionContext) throws JobExecution
synchronized (Client.class) {
count++;
}
System.out.println("#####第" + count + "次开始爬取#####");
logger.info("#####第" + count + "次开始爬取#####");
Controller.fetchProxyIp();
System.out.println("#####爬取完毕#####");
logger.info("#####爬取完毕#####");
}
}
6 changes: 5 additions & 1 deletion src/main/java/com/myapp/crawler4j/Controller.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package com.myapp.crawler4j;

import com.myapp.client.Client;
import com.myapp.entity.Crawl;
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import org.apache.log4j.Logger;

import java.util.ArrayList;
import java.util.List;
Expand All @@ -15,6 +17,8 @@
* Created by gaorui on 17/4/20.
*/
public class Controller {
private static Logger logger = Logger.getLogger(Controller.class);

public static void fetchProxyIp() {
String crawlStorageFolder = "/Users/gaorui/";
int numberOfCrawlers = 1;
Expand Down Expand Up @@ -46,7 +50,7 @@ public static void fetchProxyIp() {
for (Crawl c : crawlList) {
c.getCrawlController().start(MyCraler.class, numberOfCrawlers);
c.getCrawlController().waitUntilFinish();
System.out.println("Crawler "+c.getCrawlName()+" is finished.");
logger.info("Crawler "+c.getCrawlName()+" is finished.");
}
}
}
4 changes: 0 additions & 4 deletions src/main/java/com/myapp/crawler4j/MyCraler.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,8 @@ public boolean shouldVisit(Page referringPage, WebURL url) {
@Override
public void visit(Page page) {
String url = page.getWebURL().getURL();
System.out.println("URL: " + url);

if (page.getParseData() instanceof HtmlParseData) {
System.out.println("###########################");
HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
String text = htmlParseData.getText();
String html = htmlParseData.getHtml();
Expand All @@ -69,7 +67,6 @@ public void visit(Page page) {
String ip = tds.get(0).text();
int port = Integer.parseInt(tds.get(1).text());
String area = tds.get(5).text();
System.out.println("$:" + ip + ":" + port);
Client.proxyPool.add(ip, port);
redisOnMessageUtil.Push(area, ip, port);
}
Expand All @@ -81,7 +78,6 @@ public void visit(Page page) {
String ip = tds.get(0).text();
int port = Integer.parseInt(tds.get(1).text());
String area = tds.get(5).text();
System.out.println("$:" + ip + ":" + port);
Client.proxyPool.add(ip, port);

redisOnMessageUtil.Push(area, ip, port);
Expand Down
15 changes: 9 additions & 6 deletions src/main/java/com/myapp/main/main.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
import com.myapp.proxy.ProxyPool;
import com.myapp.util.HttpStatus;
import com.myapp.util.ProxyIpCheck;
import org.apache.log4j.Logger;
import org.quartz.*;
import java.util.concurrent.CountDownLatch;

/**
* Created by gaorui on 16/12/28.
*/
public class main implements StatefulJob {
private static Logger logger = Logger.getLogger(main.class);

ProxyPool proxyPool = null;
private static int count = 0;
private Integer countLock = 0;
Expand All @@ -22,9 +25,9 @@ public void execute(JobExecutionContext jobExecutionContext) throws JobExecution
count++;
}
proxyPool = Client.proxyPool;
System.out.println("#####爬虫ip池第" + count + "次开始测试#####");
logger.info("#####爬虫ip池第" + count + "次开始测试#####");
int idleNum = proxyPool.getIdleNum();
System.out.println("###idleNum:" + idleNum + "###");
logger.info("###idleNum:" + idleNum + "###");
int size = idleNum / 12;
int z = 0;
if (size != 0) {
Expand All @@ -44,10 +47,10 @@ public void execute(JobExecutionContext jobExecutionContext) throws JobExecution
countDownLatch.await();
Thread.sleep(200);
} catch (Exception e) {
e.printStackTrace();
logger.error(e.getMessage());
}

System.out.println("#####爬虫ip池第" + count + "次测试ing#####");
logger.info("#####爬虫ip池第" + count + "次测试ing#####");
proxyPool.allProxyStatus(); // 可以获取 ProxyPool 中所有 Proxy 的当前状态
}

Expand All @@ -65,7 +68,7 @@ public A(int j, int z,CountDownLatch latch) {

@Override
public void run() {
System.out.println("#####多线程分片跑区间:" + (j * z + 1) + "-" + ((j + 1) * z));
logger.info("#####多线程分片跑区间:" + (j * z + 1) + "-" + ((j + 1) * z));
for (int i = j * z + 1; i < (j + 1) * z; i++) {
HttpProxy httpProxy = proxyPool.borrow();
HttpStatus code = ProxyIpCheck.Check(httpProxy.getProxy());
Expand All @@ -74,7 +77,7 @@ public void run() {
proxyPool.reback(httpProxy, code); // 使用完成之后,归还 Proxy,并将请求结果的 http 状态码一起传入
}
latch.countDown();
System.out.println("当前线程" + Thread.currentThread().getName() + "执行完毕:");
logger.info("当前线程" + Thread.currentThread().getName() + "执行完毕:");
}
}
}
8 changes: 5 additions & 3 deletions src/main/java/com/myapp/scanner/ScanningPool.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.myapp.proxy.HttpProxy;
import com.myapp.util.CrawerBase;
import com.uwyn.jhighlight.tools.StringUtils;
import org.apache.log4j.Logger;
import org.apache.poi.util.StringUtil;

import java.io.InputStream;
Expand All @@ -18,14 +19,15 @@
public class ScanningPool {

public static boolean b = true;
private static Logger logger = Logger.getLogger(ScanningPool.class);

public static void scanningProxyIp(HttpProxy httpProxy) {
b = false;
Proxy proxy = httpProxy.getProxy();
String str = proxy.toString();
String filterIp = str.substring(str.indexOf("/") + 1, str.indexOf(":"));
str = str.substring(str.indexOf("/") + 1, str.indexOf(".", str.indexOf(".") + 1));
System.out.println("*扫描ip段ing:" + str);
logger.info("*扫描ip段ing:" + str);
int a[] = {80, 8080, 3128, 8081, 9080};
String ip;
for (int i = 0; i < 255; i++) {
Expand All @@ -48,7 +50,7 @@ public static void createIPAddress(String ip, int port) {
try {
url = new URL("http://www.baidu.com/");
} catch (MalformedURLException e) {
System.out.println("url invalidate");
logger.error("url invalidate");
return;
}
InetSocketAddress addr = null;
Expand All @@ -65,7 +67,7 @@ public static void createIPAddress(String ip, int port) {

if (code == 200) {
Client.proxyPool.add(ip, port);
System.out.println(addr.toString() + "is ok");
logger.info(addr.toString() + "is ok");
}

} catch (Exception e) {
Expand Down
24 changes: 14 additions & 10 deletions src/main/java/com/myapp/timer/QuartzManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@
* Created by gaorui on 17/1/9.
**/

import com.myapp.crawler4j.Controller;
import org.apache.log4j.Logger;
import org.quartz.*;
import org.quartz.impl.StdSchedulerFactory;

import java.util.Date;


public class QuartzManager {
private static Logger logger = Logger.getLogger(QuartzManager.class);

private static SchedulerFactory gSchedulerFactory = new StdSchedulerFactory();
private static String JOB_GROUP_NAME = "EXTJWEB_JOBGROUP_NAME";
private static String TRIGGER_GROUP_NAME = "EXTJWEB_TRIGGERGROUP_NAME";
Expand Down Expand Up @@ -42,7 +46,7 @@ public static void addJob(String jobName, Class cls, String time) {
}

} catch (Exception e) {
e.printStackTrace();
logger.error(e.getMessage());
return;
}
}
Expand All @@ -65,7 +69,7 @@ public static void addJobT(String jobName, Class cls) {
}

} catch (Exception e) {
e.printStackTrace();
logger.error(e.getMessage());
return;
}
}
Expand All @@ -91,7 +95,7 @@ public static void addJob(String jobName, String jobGroupName,
trigger.setCronExpression(time);// 触发器时间设定
sched.scheduleJob(jobDetail, trigger);
} catch (Exception e) {
e.printStackTrace();
logger.error(e.getMessage());
return;
}
}
Expand All @@ -117,7 +121,7 @@ public static void modifyJobTime(String jobName, String time) {
addJob(jobName, objJobClass, time);
}
} catch (Exception e) {
e.printStackTrace();
logger.error(e.getMessage());
return;
}
}
Expand Down Expand Up @@ -145,7 +149,7 @@ public static void modifyJobTime(String triggerName,
sched.resumeTrigger(triggerName, triggerGroupName);
}
} catch (Exception e) {
e.printStackTrace();
logger.error(e.getMessage());
return;
}
}
Expand All @@ -161,7 +165,7 @@ public static void removeJob(String jobName) {
sched.unscheduleJob(jobName, TRIGGER_GROUP_NAME);// 移除触发器
sched.deleteJob(jobName, JOB_GROUP_NAME);// 删除任务
} catch (Exception e) {
e.printStackTrace();
logger.error(e.getMessage());
return;
}
}
Expand All @@ -175,7 +179,7 @@ public static void startJobs() {
Scheduler sched = gSchedulerFactory.getScheduler();
sched.start();
} catch (Exception e) {
e.printStackTrace();
logger.error(e.getMessage());
return;
}
}
Expand All @@ -190,7 +194,7 @@ public static void shutdownJobs() {
sched.shutdown();
}
} catch (Exception e) {
e.printStackTrace();
logger.error(e.getMessage());
return;
}
}
Expand All @@ -211,7 +215,7 @@ public static void pauseTrigger(String jobName) {


} catch (SchedulerException e) {
e.printStackTrace();
logger.error(e.getMessage());
return;
}

Expand All @@ -227,7 +231,7 @@ public static void rescheduleJob(String jobName) {
// sched.rescheduleJob(jobName,TRIGGER_GROUP_NAME,trigger);
sched.rescheduleJob(jobName, TRIGGER_GROUP_NAME, trigger);
} catch (SchedulerException e) {
e.printStackTrace();
logger.error(e.getMessage());
return;
}
return;
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/com/myapp/util/CrawerBase.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.myapp.util;

import com.myapp.client.Client;
import org.apache.log4j.Logger;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
Expand All @@ -11,6 +13,8 @@
* Created by gaorui on 16/12/26.
*/
public class CrawerBase {
private static Logger logger = Logger.getLogger(CrawerBase.class);

public static final String[] ua = {"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)",
Expand All @@ -32,7 +36,7 @@ public static Document get(String url, int trys) throws IOException {

return connection.get();
} catch (IOException e) {
System.out.println("try connect the page:" + url + ",try times:" + trys);
logger.error("try connect the page:" + url + ",try times:" + trys);
if (trys-- != 0) {
return get(url, trys);
}
Expand Down Expand Up @@ -62,7 +66,7 @@ public static Document proxyGet(String url, int trys, String ip, int port) throw
// connection(ip, port);
return connection.get();
} catch (IOException e) {
System.out.println("try connect the page:" + url + ",try times:" + trys);
logger.error("try connect the page:" + url + ",try times:" + trys);
if (trys-- != 0) {
return get(url, trys);
}
Expand Down
5 changes: 4 additions & 1 deletion src/main/java/com/myapp/util/HttpUtil.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.myapp.util;

import org.apache.log4j.Logger;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
Expand All @@ -11,6 +13,7 @@
* Created by gaorui on 17/4/15.
*/
public class HttpUtil {
private static Logger logger = Logger.getLogger(HttpUtil.class);

public static String sendGet(String url, String param) {
String result = "";
Expand Down Expand Up @@ -41,7 +44,7 @@ public static String sendGet(String url, String param) {
result += line;
}
} catch (Exception e) {
System.out.println("发送GET请求出现异常!" + e);
logger.error("发送GET请求出现异常!" + e);
e.printStackTrace();
return result;
}
Expand Down
Loading

0 comments on commit c7f3ddc

Please sign in to comment.