Skip to content

Commit

Permalink
1. fix some front pages issues
Browse files Browse the repository at this point in the history
2. add bustag.js
3. fix model crateion issue
  • Loading branch information
gxtrobot committed Sep 29, 2019
1 parent 41d5ed1 commit 6e8dcb1
Show file tree
Hide file tree
Showing 12 changed files with 138 additions and 27 deletions.
8 changes: 6 additions & 2 deletions bustag/app/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import os
import bottle
from multiprocessing import freeze_support
from bustag.util import logger, get_cwd, get_now_time
from bottle import route, run, template, static_file, request, response, redirect
from bustag.spider.db import get_items, get_local_items, RATE_TYPE, RATE_VALUE, ItemRate, Item, LocalItem
from bustag.spider import db
from bustag.util import logger, get_cwd, get_now_time
from bustag.app.schedule import start_scheduler, add_download_job
from bustag.spider import bus_spider
from bustag.app.local import add_local_fanhao
Expand Down Expand Up @@ -56,6 +56,7 @@ def tagit():
@route('/tag/<id:int>', method='POST')
def tag(id):
if request.POST.submit:
formid = request.POST.formid
item_rate = ItemRate.get_by_itemid(id)
rate_value = request.POST.submit
if not item_rate:
Expand All @@ -70,6 +71,8 @@ def tag(id):
page = int(request.query.get('page', 1))
like = request.query.get('like')
url = f'/tagit?page={page}&like={like}'
if formid:
url += f'#{formid}'
print(url)
redirect(url)

Expand Down Expand Up @@ -111,8 +114,9 @@ def do_training():
try:
_, model_scores = clf.train()
except ValueError as ex:
logger.exception(ex)
error_msg = ' '.join(ex.args)
return template('other', path=request.path, model_scores=model_scores, error_msg=error_msg)
return template('model', path=request.path, model_scores=model_scores, error_msg=error_msg)


@route('/local_fanhao', method=['GET', 'POST'])
Expand Down
6 changes: 6 additions & 0 deletions bustag/app/static/js/bustag.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
$(function () {
$('.coverimg').on('click', function () {
$('#imglarge').attr('src', $(this).attr('src'));
$('#imagemodal').modal('show');
});
});
14 changes: 14 additions & 0 deletions bustag/app/views/base.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,26 @@
</p>
</div>
</div>

<!-- The Modal -->
<div class="modal fade" id="imagemodal" tabindex="-1" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true">
<div class="modal-dialog modal-lg">
<div class="modal-content">
<div class="modal-body">
<button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button>
<img id="imglarge" src="" class="imagepreview" style="width: 100%;" >
</div>
</div>
</div>
</div>

</footer>
<!-- Optional JavaScript -->
<!-- jQuery first, then Popper.js, then Bootstrap JS -->
<script type="text/javascript" src="/static/js/jquery.min.js"></script>
<script type="text/javascript" src="/static/js/popper.min.js"></script>
<script type="text/javascript" src="/static/js/bootstrap.min.js"></script>
<script type="text/javascript" src="/static/js/bustag.js"></script>
</body>
</html>

2 changes: 1 addition & 1 deletion bustag/app/views/index.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<form action="/correct/{{item.id}}?page={{curr_page}}&like={{like}}" method="post">
<div class="row py-3">
<div class="col-12 col-md-4">
<img class="img-fluid img-thumbnail" src={{item.cover_img_url}}>
<img class="img-fluid img-thumbnail coverimg" src={{item.cover_img_url}}>
</div>

<div class="col-7 col-md-5">
Expand Down
13 changes: 8 additions & 5 deletions bustag/app/views/tagit.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@
</ul>
</div>
</div>
%#generate list of rows of items
%#generate list of rows of items
% i = 1
%for item in items:
<form action="/tag/{{item.id}}?page={{curr_page}}&like={{like}}" method="post">
<form id="form-{{i}}" action="/tag/{{item.id}}?page={{curr_page}}&like={{like}}" method="post">
<div class="row py-3">
<div class="col-12 col-md-4">
<img class="img-fluid img-thumbnail" src={{item.cover_img_url}}>
<img class="img-fluid img-thumbnail coverimg" src={{item.cover_img_url}}>
</div>

<div class="col-7 col-md-5">
<div class="small text-muted">id: {{item.id}}</div>
<div class="small text-muted">发行日期: {{item.release_date}}</div>
Expand All @@ -40,14 +41,16 @@
<span class="badge badge-warning">{{t}}</span>
% end
</div>

</div>
<div class="col-5 col-md-3 align-self-center">
<input type=hidden name="formid" value="form-{{i}}">
<button type="submit" name="submit" class="btn btn-primary btn-sm" value="1">喜欢</button>
<button type="submit" name="submit" class="btn btn-danger btn-sm" value="0">不喜欢</button>
</div>
</div>
</form>
% i = i + 1
%end
% include('pagination.tpl', page_info=page_info)

Expand Down
8 changes: 6 additions & 2 deletions bustag/model/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,17 @@ def load_data():


def as_dict(item):
tags_set = set()
for tags in item.tags_dict.values():
for tag in tags:
tags_set.add(tag)
d = {
'id': item.id,
'id': item.fanhao,
'title': item.title,
'fanhao': item.fanhao,
'url': item.url,
'add_date': item.add_date,
'tags': item.tags,
'tags': tags_set,
'cover_img_url': item.cover_img_url,
'target': item.rate_value
}
Expand Down
25 changes: 17 additions & 8 deletions bustag/spider/bus_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import signal
from aspider.routeing import get_router
from .parser import parse_item
from .db import save
from bustag.util import APP_CONFIG, get_full_url
from .db import save, Item
from bustag.util import APP_CONFIG, get_full_url, logger
router = get_router()
counter = 0
SYSTEM_EXIT = False
Expand All @@ -33,7 +33,7 @@ def get_url_by_fanhao(fanhao):


def verify_page_path(path, no):
print(f'verify page {path} , args {no}')
logger.debug(f'verify page {path} , args {no}')
no = int(no)
if no <= 20:
return True
Expand All @@ -52,18 +52,27 @@ def process_page(text, path, no):
'''
process list page
'''
print(f'page {no} has length {len(text)}')
logger.debug(f'page {no} has length {len(text)}')
print(f'process page {no}')


@router.route('/<fanhao:[\w]+-[\d]+>')
def process_item(text, path, fanhao):
def verify_fanhao(path, fanhao):
'''
process item page
verify fanhao before add it to queue
'''
global counter
counter += 1
print(f'process item {fanhao}')
exists = Item.get_by_fanhao(fanhao)
logger.debug(f'verify {fanhao}: , exists:{exists is None}, skip {path}')
return exists is None


@router.route('/<fanhao:[\w]+-[\d]+>', verify_fanhao)
def process_item(text, path, fanhao):
'''
process item page
'''
logger.debug(f'process item {fanhao}')
url = path
meta, tags = parse_item(text)
meta.update(url=url)
Expand Down
8 changes: 4 additions & 4 deletions bustag/spider/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def get_items(rate_type=None, rate_value=None, page=1, page_size=10):
'''
get required items based on some conditions
'''
items = []
items_list = []
clauses = []
if rate_type is not None:
clauses.append(ItemRate.rate_type == rate_type)
Expand All @@ -275,17 +275,17 @@ def get_items(rate_type=None, rate_value=None, page=1, page_size=10):
if not page is None:
q = q.paginate(page, page_size)
items = get_tags_for_items(q)
for item in q:
for item in items:
Item.loadit(item)
if hasattr(item, 'item_rate'):
item.rate_value = item.item_rate.rate_value
else:
item.rate_value = None
items.append(item)
items_list.append(item)

total_pages = (total_items + page_size - 1) // page_size
page_info = (total_items, total_pages, page, page_size)
return items, page_info
return items_list, page_info


def get_local_items(page=1, page_size=10):
Expand Down
34 changes: 29 additions & 5 deletions bustag/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from urllib.parse import urljoin

logger = logging.getLogger('bustag')

TESTING = False
DATA_PATH = 'data/'
CONFIG_FILE = 'config.ini'
MODEL_PATH = 'model/'
Expand All @@ -21,12 +21,24 @@ def get_cwd():
return os.getcwd()


def check_testing():
global TESTING
if os.environ.get('TESTING'):
TESTING = True
print('*** in test mode ***')


def setup_logging():
logger.addHandler(logging.StreamHandler())
formatter = logging.Formatter(
'%(asctime)s - %(name)-20s - %(levelname)-8s \n- %(message)s')
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
logger.setLevel(logging.DEBUG)
# pw_logger = logging.getLogger('peewee')
# pw_logger.addHandler(logging.StreamHandler())
# pw_logger.setLevel(logging.DEBUG)
if TESTING:
pw_logger = logging.getLogger('peewee')
pw_logger.addHandler(logging.StreamHandler())
pw_logger.setLevel(logging.DEBUG)


def get_data_path(file):
Expand All @@ -45,7 +57,18 @@ def get_full_url(path):
return full_url


def check_config():
config_path = get_data_path(CONFIG_FILE)
abs_path = os.path.abspath(config_path)
if not os.path.exists(abs_path):
logger.error(
f'file {abs_path} not exists, please make sure file exists and configed, system quit now!')
logger.error(f'文件 {abs_path} 不存在, 请检查文件存在并已配置, 系统退出!')
sys.exit(1)


def load_config():
check_config()
config_path = get_data_path(CONFIG_FILE)
conf = configparser.ConfigParser()
conf.read(config_path)
Expand Down Expand Up @@ -78,6 +101,7 @@ def check_model_folder():

def init():
print(f'CWD: {get_cwd()}')
check_testing()
setup_logging()
load_config()
check_model_folder()
Expand Down
12 changes: 12 additions & 0 deletions tests/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@ def test_get_items():
f'total_items: {page_info[0]}, total_page: {page_info[1]}, current_page: {page_info[2]}, page_size:{page_info[3]}')


def test_get_items2():
rate_type = None
rate_value = None
page = None
items, page_info = get_items(
rate_type=rate_type, rate_value=rate_value, page=page)
assert len(items) > 0
print(f'item count:{len(items)}')
print(
f'total_items: {page_info[0]}, total_page: {page_info[1]}, current_page: {page_info[2]}, page_size:{page_info[3]}')


def test_getit():
id = 100
item = Item.getit(id)
Expand Down
29 changes: 29 additions & 0 deletions tests/test_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import random
from bustag.model import classifier as clf
from bustag.model.prepare import prepare_predict_data
from bustag.spider.db import Item, get_items, ItemRate


def test_train_model():
Expand All @@ -10,3 +12,30 @@ def test_recommend():
total, count = clf.recommend()
print('total:', total)
print('recommended:', count)


def test_make_model():
'''
tag random data to generate model
'''
page = 50
no_rate_items = []
for i in range(1, page):
items, _ = get_items(None, None, i)
no_rate_items.extend(items)
size = len(no_rate_items)
like_ratio = 0.4
like_items = []
unlike_items = []
for item in no_rate_items:
if random.random() < like_ratio:
like_items.append(item)
else:
unlike_items.append(item)
print(f'like items: {len(like_items)}, unlike items: {len(unlike_items)}')
for item in like_items:
ItemRate.saveit(1, 1, item.fanhao)
for item in unlike_items:
ItemRate.saveit(1, 0, item.fanhao)

clf.train()
6 changes: 6 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,9 @@ def test_to_localtime():
t = datetime.utcnow()
local = util.to_localtime(t)
print(local)


def test_testing_mode():
import os
print(f'env: {os.getenv("TESTING")}')
assert util.TESTING == True

0 comments on commit 6e8dcb1

Please sign in to comment.