Skip to content

Commit

Permalink
包整理,代码优化
Browse files Browse the repository at this point in the history
  • Loading branch information
changwenbo committed Oct 18, 2019
1 parent cfc1959 commit ef663e7
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 135 deletions.
1 change: 0 additions & 1 deletion src/main/java/com/tianya/HouseApplication.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,5 @@ public class HouseApplication {
public static void main(String[] args) {

SpringApplication.run(HouseApplication.class, args);
System.out.println();
}
}
79 changes: 16 additions & 63 deletions src/main/java/com/tianya/controller/HouseController.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
package com.tianya.controller;

import com.tianya.util.HttpMethod;
import com.tianya.service.HouseService;
import com.tianya.util.FileUtils;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* @Auther: Chang
Expand All @@ -22,64 +17,22 @@
@Slf4j
public class HouseController {

public static final String PREFIX = "https://bbs.tianya.cn/m/post_author-house-252774-";
public static final String SUFFIX = ".shtml";

@Autowired
private HttpMethod httpMethod;

/**
* 获取评论
* @return
*/
public List<String> getComment() {
List<String> res = new ArrayList<>();
// 92是固定的,帖子的总数
log.info("开始请求天涯帖子.....");
for (int i = 1; i <= 92; i++) {
String url = getUrl(i);
String content = httpMethod.get(url);
getParse(content, res);
log.info("请求帖子第 " + i + " 行");
}
log.info("结束请求天涯帖子.....");
return res;
}
private HouseService houseService;

/**
* 得到url地址
* @param i
* @return
*/
private String getUrl(int i) {
int pos = 5 * i - 4;
String res = PREFIX + pos + SUFFIX;
return res;
@RequestMapping("/")
public String index() {
return "Hello World";
}

/**
* 进行数据清理
* @param content
* @param res
*/
private void getParse(String content, List<String> res) {

Document doc = Jsoup.parse(content);
Elements links = doc.getElementsByClass("bd");
for (Element link : links) {
String str = link.toString();
Pattern pattern = Pattern.compile("<p>[.\\s\\S]+?div");
Matcher m = pattern.matcher(str);
while (m.find()) {
String s = m.group();
s = s.replaceAll("<", "");
s = s.replaceAll(">", "");
s = s.replaceAll("/", "");
s = s.replaceAll("p", "");
s = s.replaceAll("div", "");
s = s.replaceAll("\n", "");
res.add(s);
}
}
@RequestMapping(value = "/get/comment")
public String getComments() {
// 第一步获取评论
List<String> comment = houseService.getComment();
// 第二步,转化为md格式
List<String> commentMd = houseService.transferStrToMD(comment);
// 第三步,写入文件。默认写入D盘中,文件名为house.md
FileUtils.writeFile(commentMd);
return "ok";
}
}
99 changes: 72 additions & 27 deletions src/main/java/com/tianya/service/HouseService.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
package com.tianya.service;

import com.tianya.controller.HouseController;
import com.tianya.util.HttpMethod;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* @Auther: Chang
Expand All @@ -19,41 +25,80 @@
@Slf4j
public class HouseService {

public static final String PREFIX = "https://bbs.tianya.cn/m/post_author-house-252774-";
public static final String SUFFIX = ".shtml";

@Autowired
private HouseController houseController;

/** 写入文件中,转化为PDF文档 */
public void writeFile() {
String PREFIX = "### ==**";
String SUFFIX = "楼: **==" + "\n";
List<String> res = getMarkDown();
try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(new File("f:\\house.txt")))) {
int cnt = 1;
for (String s : res) {
// markdown格式
String ss = PREFIX + cnt + SUFFIX + "\n";
bos.write(ss.getBytes());
// 真正数据
bos.write(s.getBytes());
cnt++;
}
System.out.println("success");
} catch (Exception e) {
e.printStackTrace();
}
}

/**
* 转化为markdown格式
* @return
*/
public List<String> getMarkDown() {
List<String> res = houseController.getComment();
public List<String> transferStrToMD(List<String> comment) {
if (CollectionUtils.isEmpty(comment)) {
return Collections.EMPTY_LIST;
}
List<String> res = comment;
for (int i = 0; i < res.size(); i++) {
res.set(i, " " + " " + res.get(i) + "\n");
}
return res;
}

/**
* 获取评论
* @return
*/
public List<String> getComment() {
List<String> res = new ArrayList<>();
// 92是固定的,帖子的总数
log.info("开始请求天涯帖子.....");
for (int i = 1; i <= 92; i++) {
String url = getUrl(i);
String content = HttpMethod.get(url);
getParse(content, res);
log.info("请求帖子第 " + i + " 行");
}
log.info("结束请求天涯帖子.....");
return res;
}

/**
* 得到url地址
* @param i
* @return
*/
private String getUrl(int i) {
int pos = 5 * i - 4;
String res = PREFIX + pos + SUFFIX;
return res;
}

/**
* 进行数据清理
* @param content
* @param res
*/
private void getParse(String content, List<String> res) {
Document doc = Jsoup.parse(content);
Elements links = doc.getElementsByClass("bd");
for (Element link : links) {
String str = link.toString();
Pattern pattern = Pattern.compile("<p>[.\\s\\S]+?div");
Matcher m = pattern.matcher(str);
while (m.find()) {
String s = m.group();
s = s.replaceAll("<", "");
s = s.replaceAll(">", "");
s = s.replaceAll("/", "");
s = s.replaceAll("p", "");
s = s.replaceAll("div", "");
s = s.replaceAll("\n", "");
res.add(s);
}
}
}
}


Expand Down
38 changes: 38 additions & 0 deletions src/main/java/com/tianya/util/FileUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package com.tianya.util;

import lombok.extern.slf4j.Slf4j;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.util.List;

/**
* @author changwenbo
* @date 2019/10/18 16:58
*/
@Slf4j
public class FileUtils {

private static final String PREFIX = "### ==**";
private static final String SUFFIX = "楼: **==" + "\n";

/** 写入文件中,转化为PDF文档 */
public static void writeFile(List<String> res) {
try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(new File("d:\\house.md")))) {
int cnt = 1;
for (String s : res) {
// markdown格式
String ss = PREFIX + cnt + SUFFIX + "\n";
bos.write(ss.getBytes());
// 真正数据
bos.write(s.getBytes());
cnt++;
}
System.out.println("success");
} catch (Exception e) {
log.error("写入文件出错:" + e.getMessage());
e.printStackTrace();
}
}
}
6 changes: 2 additions & 4 deletions src/main/java/com/tianya/util/HttpMethod.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
import com.arronlong.httpclientutil.common.HttpHeader;
import com.arronlong.httpclientutil.exception.HttpProcessException;
import org.apache.http.Header;
import org.springframework.stereotype.Component;

/**
* @Auther: Chang
* @Date: 2018/10/3
*/
@Component
public class HttpMethod {

public String get(String url) {
public static String get(String url) {
HttpConfig config = getConfig();
String content = null;
try {
Expand All @@ -25,7 +23,7 @@ public String get(String url) {
return content;
}

private HttpConfig getConfig() {
private static HttpConfig getConfig() {
String cookies = "__guid=414814951; __guid2=414814951; user=w=wenber888&id=137776589&f=1; right=web4=n&portal=n; __u_a=v2.2.6; sso=r=1648448059&sid=&wsid=B45353929B80073F357A507D1C991623; temp=k=447189638&s=&t=1541148185&b=4192447767437b928489e63e3913ef52&ct=1541148185&et=1543740185; temp4=rm=16a6cb6e3414a9655ac76f341c44e1e4; u_tip=137776589=0; vip=447189638%3D0; JSESSIONID=abc_q2y-0bQSr8Zw1LtBw; time=ct=1541148308.213; __ptime=1541148308504; ty_msg=1541148523985_137776589_2_0_0_0_0_0_2_0_0_0_0";
String referer = "https://www.baidu.com";
Header[] headers= HttpHeader.custom().cookie(cookies).referer(referer).build();
Expand Down
40 changes: 0 additions & 40 deletions src/main/java/com/tianya/web/HouseWeb.java

This file was deleted.

0 comments on commit ef663e7

Please sign in to comment.