Skip to content
This repository has been archived by the owner on Apr 10, 2022. It is now read-only.

Commit

Permalink
提高时间提取的准确性;增加过滤
Browse files Browse the repository at this point in the history
  • Loading branch information
xiandanin committed Jan 6, 2020
1 parent b324b6f commit 1863968
Show file tree
Hide file tree
Showing 12 changed files with 168 additions and 20 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ src/dist
Project_Default.xml
package-lock.json
.idea
scripts/filter-data/.temp
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
"pack": "npm run pack:main && npm run pack:renderer",
"pack:main": "cross-env NODE_ENV=production webpack --progress --colors --config .electron-vue/webpack.main.config.js",
"pack:renderer": "cross-env NODE_ENV=production webpack --progress --colors --config .electron-vue/webpack.renderer.config.js",
"postinstall": "npm run lint:fix"
"postinstall": "npm run lint:fix",
"export-filter-db": "node scripts/export-filter-db.js"
},
"build": {
"productName": "magnetW",
Expand Down Expand Up @@ -83,8 +84,10 @@
"moment": "^2.24.0",
"request": "^2.88.0",
"request-promise-native": "^1.0.7",
"sequelize": "^5.21.3",
"socks5-http-client": "^1.0.4",
"socks5-https-client": "^1.2.1",
"sqlite3": "^4.1.1",
"urijs": "^1.19.2",
"vue": "^2.6.10",
"vue-clipboard2": "^0.3.1",
Expand Down
1 change: 0 additions & 1 deletion scripts/build-service.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
/* 编译成node服务 */

const {execSync} = require('child_process')
const path = require('path')
const fs = require('fs-extra')
const Terser = require('terser')
Expand Down
25 changes: 25 additions & 0 deletions scripts/export-filter-db.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// 导出过滤词库
// https://github.com/fighting41love/funNLP/tree/master/data/%E6%95%8F%E6%84%9F%E8%AF%8D%E5%BA%93
const {execSync} = require('child_process')
const path = require('path')
const fs = require('fs-extra')

// 如果临时git文件夹不存在 就拉取
const zip = 'scripts/filter-data/data.zip'
const temp = 'scripts/filter-data/.temp'
fs.emptyDirSync(temp)
fs.mkdirsSync(temp)
execSync(`unzip ${zip} -d ${temp} -x __MACOSX/*> /dev/null 2>&1`, {stdio: 'inherit'})

const words = []
const files = fs.readdirSync(temp)
files.forEach((it) => {
const file = `${temp}/${it}`
const itemWords = fs.readFileSync(file, 'utf-8').split('\n')
words.push.apply(words, itemWords)
console.log('添加过滤词数 ', itemWords.length)
})
console.log('加载完成,过滤词数共%d条', words.length)

// sqlite
fs.writeFileSync('src/main/filter/keywords.txt', Buffer.from(words.join('\n')).toString('base64'))
Binary file added scripts/filter-data/data.zip
Binary file not shown.
21 changes: 21 additions & 0 deletions scripts/icns.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
filepath=256x256.png
iconset=.icns.iconset

cd build/icons
if [ ! -d $iconset ];then
mkdir $iconset
fi

sips -z 16 16 $filepath --out $iconset/icon_16x16.png
sips -z 32 32 $filepath --out $iconset/icon_16x16@2x.png
sips -z 32 32 $filepath --out $iconset/icon_32x32.png
sips -z 64 64 $filepath --out $iconset/icon_32x32@2x.png
sips -z 128 128 $filepath --out $iconset/icon_128x128.png
sips -z 256 256 $filepath --out $iconset/icon_128x128@2x.png
sips -z 256 256 $filepath --out $iconset/icon_256x256.png
sips -z 512 512 $filepath --out $iconset/icon_256x256@2x.png
sips -z 512 512 $filepath --out $iconset/icon_512x512.png
sips -z 1024 1024 $filepath --out $iconset/icon_512x512@2x.png

iconutil -c icns $iconset -o icon.icns
icotool -c $iconset/icon_256x256.png -o icon.ico
4 changes: 2 additions & 2 deletions src/main/api.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
const Koa = require('koa')
const Router = require('koa-router')
const koaStatic = require('koa-static')
const app = new Koa()
const prefix = '/api'
const router = new Router({prefix})
Expand All @@ -26,9 +25,10 @@ router.get('/load-rule', async (ctx) => {
router.get('/search', async (ctx) => {
if (ctx.query.keyword) {
const current = repo.makeupSearchOption(ctx.query)
const items = await repo.obtainSearchResult(current, ctx.headers)
const {originalCount, items} = await repo.obtainSearchResult(current, ctx.headers)
ctx.success({
current,
originalCount,
items
})

Expand Down
2 changes: 2 additions & 0 deletions src/main/defaultConfig.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ module.exports = function () {
showProxyRule: false,
// 是否显示源站入口
showSourceLink: false,
// 过滤
filterBare: true,
// 使用代理
proxy: false,
proxyType: 'http',
Expand Down
87 changes: 87 additions & 0 deletions src/main/filter/filter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// https://github.com/aojiaotage/text-censor

const path = require('path')
const fs = require('fs')
const iconv = require('iconv-lite')

const map = {}

function initialize () {
const original = Buffer.from(fs.readFileSync(path.resolve(__dirname, './keywords.txt'), 'utf-8'), 'base64')
const words = original.toString().split('\n')
words.forEach((line) => {
if (line) {
addWord(line)
}
})
}

function addWord (word) {
let parent = map

for (let i = 0; i < word.length; i++) {
if (!parent[word[i]]) parent[word[i]] = {}
parent = parent[word[i]]
}
parent.isEnd = true
}

function isFilter (s, cb) {
let parent = map

for (let i = 0; i < s.length; i++) {
if (s[i] === '*') {
continue
}

let found = false
let skip = 0
let sWord = ''

for (let j = i; j < s.length; j++) {
if (!parent[s[j]]) {
found = false
skip = j - i
parent = map
break
}

sWord = sWord + s[j]
if (parent[s[j]].isEnd) {
found = true
skip = j - i
break
}
parent = parent[s[j]]
}

if (skip > 1) {
i += skip - 1
}

if (!found) {
continue
}

let stars = '*'
for (let k = 0; k < skip; k++) {
stars = stars + '*'
}

let reg = new RegExp(sWord, 'g')
if (reg.test(s)) {
return true
}
// s = s.replace(reg, stars)
}

if (typeof cb === 'function') {
cb(null, s)
}

return false
}

module.exports = {
initialize, isFilter
}
1 change: 1 addition & 0 deletions src/main/filter/keywords.txt

Large diffs are not rendered by default.

29 changes: 14 additions & 15 deletions src/main/format-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,12 @@ module.exports = {
} else {
// 如果不是磁力链 就提取 连续字母数字32-40位
let match = /[\da-zA-Z]{32,40}/.exec(url)
return match ? `magnet:?xt=urn:btih:${match[0]}`.toLowerCase() : url
if (match) {
return `magnet:?xt=urn:btih:${match[0]}`.toLowerCase()
}
}
} else {
return null
}
return null
},
/**
* 提取时间
Expand Down Expand Up @@ -116,26 +117,24 @@ module.exports = {
}
}
// 如果是时间间隔
let number = 0
let name = 'days'
if (/yesterday|/.test(dateText)) {
number = 1
name = 'days'
return moment().subtract(1, 'day').valueOf()
} else {
const unit = [
{regx: /year|/, name: 'years'}, {regx: /month|/, name: 'months'},
{regx: /day|/, name: 'days'}, {regx: /hours|/, name: 'hour'},
{regx: /minute|/, name: 'minutes'}, {regx: /second|/, name: 'seconds'}
{regx: 'yesterday|昨天', name: 'days'},
{regx: 'year|年', name: 'years'}, {regx: 'month|月', name: 'months'},
{regx: 'day|天', name: 'days'}, {regx: 'hour|小时', name: 'hour'},
{regx: 'minute|分钟', name: 'minutes'}, {regx: 'second|秒', name: 'seconds'}
]
number = extractNumber(dateText)
for (let i = 0; i < unit.length; i++) {
if (unit[i].regx.test(dateText)) {
name = unit[i].name
break
const dateTextMatches = new RegExp(`\\d+( {0,3})${unit[i].regx}`, 'gi').exec(dateText)
if (dateTextMatches) {
const number = extractNumber(dateTextMatches[0])
return moment().subtract(number, unit[i].name).valueOf()
}
}
}
return moment().subtract(number, name).valueOf()
return 0
}
},
/**
Expand Down
12 changes: 11 additions & 1 deletion src/main/repository.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const fs = require('fs')
const request = require('request-promise-native')
// const fs = require('fs')
const cacheManager = require('./cache')
const {initialize, isFilter} = require('./filter/filter')
const xpath = require('xpath')
const DOMParser = require('xmldom').DOMParser
const htmlparser2 = require('htmlparser2')
Expand All @@ -26,6 +27,8 @@ let config = null

function applyConfig (newConfig) {
config = newConfig

initialize()
}

function clearCache () {
Expand Down Expand Up @@ -128,7 +131,14 @@ async function obtainSearchResult ({id, url}, headers) {
cacheManager.set(url, items, config.cacheExpired)
}
}
return items

// 过滤
const originalCount = items.length
if (config.filterBare) {
items = items.filter((item) => !isFilter(item.name.replace(/ /g, '')))
}

return {originalCount, items}
}

/**
Expand Down

0 comments on commit 1863968

Please sign in to comment.