Skip to content

Commit

Permalink
update proxy source
Browse files Browse the repository at this point in the history
  • Loading branch information
lcstore committed Feb 4, 2015
1 parent 89661e9 commit eb44823
Show file tree
Hide file tree
Showing 4 changed files with 13,677 additions and 175 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ public void testConfig() throws Exception {
task.put("port", 1080);
task.put("proxyType", 2);

task.put("ip", "177.55.255.3");
task.put("port", 8080);
task.put("proxyType", 2);
task.put("ip", "222.87.129.218");
task.put("port", 83);
task.put("proxyType", 1);
String returnObject = parser.doParse(task);
System.out.println("result:" + returnObject);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,18 @@
<property name="limit" value="3000" />
<property name="nameList">
<list>
<!-- <value>ConfigJdProduct</value>
<!--
<value>ConfigJdProduct</value>
<value>ConfigJdPromotion</value>
<value>ConfigYhdPromotion</value>
<value>ConfigTmallBrandShop</value>
<value>ConfigJdBrandShop</value>
<value>ConfigProxyDetector</value>
<value>ConfigProxyCollector</value>
<value>ConfigYhdProduct</value>
<value>ConfigProxyChecker</value> -->
<value>ConfigProxyDetector</value>
-->
<value>ConfigProxyChecker</value>
</list>
</property>
</bean>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,125 +38,50 @@ public void testFindProxys() throws Exception {
ProxyAddrServiceImpl brandService = new ProxyAddrServiceImpl();
brandService.setProxyAddrDao(proxyAddrDao);
int maxCount = 100;
List<String> list = new ArrayList<String>();
list.add("81");
list.add("82");
list.add("83");
list.add("84");
list.add("86");
list.add("88");
list.add("444");
list.add("629");
list.add("800");
list.add("808");
list.add("843");
list.add("1053");
list.add("1071");
list.add("1080");
list.add("1209");
list.add("2076");
list.add("2214");
list.add("2235");
list.add("3080");
list.add("3127");
list.add("3129");
list.add("3130");
list.add("3333");
list.add("4444");
list.add("5580");
list.add("7808");
list.add("8000");
list.add("8001");
list.add("8003");
list.add("8010");
list.add("8020");
list.add("8080");
list.add("8081");
list.add("8084");
list.add("8085");
list.add("8088");
list.add("8089");
list.add("8090");
list.add("8103");
list.add("8118");
list.add("8123");
list.add("8585");
list.add("8888");
list.add("9000");
list.add("9064");
list.add("9090");
list.add("9091");
list.add("9797");
list.add("9999");
list.add("10800");
list.add("12351");
list.add("13709");
list.add("14001");
list.add("14826");
list.add("15238");
list.add("16550");
list.add("16696");
list.add("17072");
list.add("17183");
list.add("18000");
list.add("18186");
list.add("18350");
list.add("18888");
list.add("18919");
list.add("19305");
list.add("33333");
list.add("33948");
list.add("44338");
list.add("46397");
list.add("53179");
for (String portString : list) {

for (int i = 0; i <= maxCount; i++) {
try {
// String url =
// String.format("http://www.proxy.com.ru/list_%s.html", i);
// String url =
// String.format("http://www.cybersyndrome.net/pla.html");
// String url =
// String.format("http://www.proxylist.ro/free-proxy-list-widget.js?size=20");
// String url =
// String.format("http://checkerproxy.net/all_proxy");
// String url =
// String.format("http://www.xroxy.com/proxylist.php?port=&type=&ssl=&country=&latency=&reliability=&sort=reliability&desc=true&pnum=%d",
// i);
// String url =
// String.format("http://proxy-list.org/english/index.php?p=%d",
// i);
// String url =
// String.format("http://free-proxy.cz/en/proxylist/main/%d",
// i);
// String url = String.format("http://www.blackhatworld.com/blackhat-seo/proxy-lists/354949-200-scrapebox-passed-http-proxies-freshly-verified-w-screenshot-daily-updates-%d.html", i);
// String url = String.format("http://socks5proxies.com/index.php?page=%d&action=freeproxy", i);
// String url =
// String.format("http://www.samair.ru/proxy/proxy-%s.htm", i <
// 10 ? "0" + i : i);
String url =
String.format("http://www.proxylists.net/%s_%s_ext.html", portString, i);
System.err.println("start to parser:" + url);
byte[] byteArray = Jsoup.connect(url).header("Accept-Encoding", "gzip, deflate").timeout(60000).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0").referrer(url).method(Method.GET).execute().bodyAsBytes();
String source = new String(byteArray, "gbk");
List<ProxyAddrDto> pageList = findProxy(source);
if (pageList.size() < 5) {
Document dom = Jsoup.parse(source);
source = dom.text();
pageList = findProxy(source);
System.err.println("start[text] to save count:" + pageList.size());
} else {
System.err.println("start[html] to save count:" + pageList.size());
}
if (pageList.isEmpty()) {
break;
}
brandService.batchSaveProxyAddrs(pageList);
TimeUnit.SECONDS.sleep(5);
} catch (Exception e) {
e.printStackTrace();
for (int i = 0; i <= maxCount; i++) {
try {
// String url = String.format("http://www.proxy.com.ru/list_%s.html", i);
// String url =String.format("http://www.cybersyndrome.net/pla.html");
// String url =String.format("http://www.proxylist.ro/free-proxy-list-widget.js?size=20");
// String url =
// String.format("http://checkerproxy.net/all_proxy");
String url =
String.format("http://www.xroxy.com/proxylist.php?port=&type=&ssl=&country=&latency=&reliability=&sort=reliability&desc=true&pnum=%d",
i);
// String url =
// String.format("http://proxy-list.org/english/index.php?p=%d",
// i);
// String url =
// String.format("http://free-proxy.cz/en/proxylist/main/%d",
// i);
// String url =
// String.format("http://www.blackhatworld.com/blackhat-seo/proxy-lists/354949-200-scrapebox-passed-http-proxies-freshly-verified-w-screenshot-daily-updates-%d.html",
// i);
// String url =
// String.format("http://socks5proxies.com/index.php?page=%d&action=freeproxy",
// i);
// String url = String.format("http://www.samair.ru/proxy/proxy-%s.htm", i < 10 ? "0" + i : i);
// String url = String.format("http://www.proxylists.net/%s_%s_ext.html", portString, i);
System.err.println("start to parser:" + url);
byte[] byteArray = Jsoup.connect(url).header("Accept-Encoding", "gzip, deflate").timeout(60000).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0")
.referrer(url).method(Method.GET).execute().bodyAsBytes();
String source = new String(byteArray, "gbk");
List<ProxyAddrDto> pageList = findProxy(source);
if (pageList.size() < 5) {
Document dom = Jsoup.parse(source);
source = dom.text();
pageList = findProxy(source);
System.err.println("start[text] to save count:" + pageList.size());
} else {
System.err.println("start[html] to save count:" + pageList.size());
}
if (pageList.isEmpty()) {
break;
}
brandService.batchSaveProxyAddrs(pageList);
TimeUnit.SECONDS.sleep(5);
} catch (Exception e) {
e.printStackTrace();
}
}
}
Expand All @@ -183,7 +108,7 @@ public void testParserProxys() throws Exception {
}

private List<ProxyAddrDto> findProxy(String source) throws JSONException {
source = doDecode(source);
// source = doDecode(source);
JSONArray proxyArray = doProxyParser(source);
List<ProxyAddrDto> dtoList = convert2Dto(proxyArray);
return dtoList;
Expand Down
Loading

0 comments on commit eb44823

Please sign in to comment.