Skip to content

Commit a24b78e

Browse files
modify README.md
1 parent f4c69f6 commit a24b78e

File tree

3 files changed

+16
-16
lines changed

3 files changed

+16
-16
lines changed

README.md

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,9 @@ WebCollector is an open source web crawler framework based on Java.It provides
33
some simple interfaces for crawling the Web,you can setup a
44
multi-threaded web crawler in less than 5 minutes.
55

6-
7-
8-
96
## HomePage
107
[https://github.com/CrawlScript/WebCollector](https://github.com/CrawlScript/WebCollector)
118

12-
<!--
13-
## Document
14-
[WebCollector-GitDoc](https://github.com/CrawlScript/WebCollector-GitDoc)
15-
-->
16-
179

1810
## Installation
1911

@@ -34,13 +26,19 @@ WebCollector jars are available on the [HomePage](https://github.com/CrawlScript
3426

3527

3628

29+
## Example Index
30+
31+
+ [AbuyunDynamicProxyRequester.java](src/main/java/cn/edu/hfut/dmic/webcollector/example/AbuyunDynamicProxyRequester.java)
32+
33+
34+
3735

3836
## Quickstart
3937
Lets crawl some news from github news.This demo prints out the titles and contents extracted from news of github news.
4038

4139
### Automatically Detecting URLs
4240

43-
[AutoNewsCrawler.java](https://github.com/CrawlScript/WebCollector/blob/master/AutoNewsCrawler.java):
41+
[DemoAutoNewsCrawler.java](src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoAutoNewsCrawler.java):
4442

4543
```java
4644

@@ -53,14 +51,14 @@ import cn.edu.hfut.dmic.webcollector.plugin.rocks.BreadthCrawler;
5351
*
5452
* @author hu
5553
*/
56-
public class AutoNewsCrawler extends BreadthCrawler {
54+
public class DemoAutoNewsCrawler extends BreadthCrawler {
5755
/**
5856
* @param crawlPath crawlPath is the path of the directory which maintains
5957
* information of this crawler
6058
* @param autoParse if autoParse is true,BreadthCrawler will auto extract
6159
* links which match regex rules from pag
6260
*/
63-
public AutoNewsCrawler(String crawlPath, boolean autoParse) {
61+
public DemoAutoNewsCrawler(String crawlPath, boolean autoParse) {
6462
super(crawlPath, autoParse);
6563
/*start pages*/
6664
this.addSeed("https://blog.github.com/");
@@ -106,7 +104,7 @@ public class AutoNewsCrawler extends BreadthCrawler {
106104
}
107105

108106
public static void main(String[] args) throws Exception {
109-
AutoNewsCrawler crawler = new AutoNewsCrawler("crawl", true);
107+
DemoAutoNewsCrawler crawler = new DemoAutoNewsCrawler("crawl", true);
110108
/*start crawl with depth of 4*/
111109
crawler.start(4);
112110
}
@@ -119,7 +117,7 @@ public class AutoNewsCrawler extends BreadthCrawler {
119117
### Manually Detecting URLs
120118

121119

122-
[ManualNewsCrawler.java](https://github.com/CrawlScript/WebCollector/blob/master/ManualNewsCrawler.java):
120+
[DemoManualNewsCrawler.java](src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoManualNewsCrawler.java):
123121

124122
```java
125123

@@ -132,14 +130,14 @@ import cn.edu.hfut.dmic.webcollector.plugin.rocks.BreadthCrawler;
132130
*
133131
* @author hu
134132
*/
135-
public class ManualNewsCrawler extends BreadthCrawler {
133+
public class DemoManualNewsCrawler extends BreadthCrawler {
136134
/**
137135
* @param crawlPath crawlPath is the path of the directory which maintains
138136
* information of this crawler
139137
* @param autoParse if autoParse is true,BreadthCrawler will auto extract
140138
* links which match regex rules from pag
141139
*/
142-
public ManualNewsCrawler(String crawlPath, boolean autoParse) {
140+
public DemoManualNewsCrawler(String crawlPath, boolean autoParse) {
143141
super(crawlPath, autoParse);
144142
// add 5 start pages and set their type to "list"
145143
//"list" is not a reserved word, you can use other string instead
@@ -182,7 +180,7 @@ public class ManualNewsCrawler extends BreadthCrawler {
182180
}
183181

184182
public static void main(String[] args) throws Exception {
185-
ManualNewsCrawler crawler = new ManualNewsCrawler("crawl", false);
183+
DemoManualNewsCrawler crawler = new DemoManualNewsCrawler("crawl", false);
186184

187185
crawler.getConf().setExecuteInterval(5000);
188186

src/main/java/cn/edu/hfut/dmic/webcollector/example/DemoManualNewsCrawler.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
package cn.edu.hfut.dmic.webcollector.example;
2+
13
import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
24
import cn.edu.hfut.dmic.webcollector.model.Page;
35
import cn.edu.hfut.dmic.webcollector.plugin.rocks.BreadthCrawler;

webcollector-2.73-alpha-bin.zip

-78 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)