-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Ubuntu
committed
Jul 23, 2020
1 parent
d52b7dc
commit 9c62288
Showing
7 changed files
with
460 additions
and
28 deletions.
There are no files selected for viewing
131 changes: 112 additions & 19 deletions
131
TF_SimilarityAbstractSearch/Notebooks/02_Sentence_Similarity.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,62 @@ | ||
import os | ||
from pathlib import Path | ||
from flask import Flask, request, jsonify, url_for, render_template | ||
from flask import Flask, request, jsonify, url_for, render_template, redirect | ||
from similarity_abstract_search import utils | ||
|
||
app = Flask(__name__) | ||
DATADIR=Path(__file__).resolve().parents[3]/'Data/processed/' | ||
DATADIR=Path(__file__).resolve().parents[2]/'Data/processed/' | ||
sim_dict = utils.load_json(DATADIR/'sim_vecs.json') | ||
search_dict= utils.load_json(DATADIR/'search.json') | ||
raw_paper_data=utils.load_json(DATADIR/'raw_paper_data.json') | ||
paper_dict=utils.load_json(DATADIR/'paper_data.json') | ||
|
||
paperIDtoIDX = {p['id']:idx for idx, p in enumerate(paper_dict)} | ||
|
||
# doi_to_idx= get_doi_to_idx() | ||
@app.route("/search", methods=["GET"]) | ||
def search(): | ||
pass | ||
q = request.args.get('q','') | ||
if not q: | ||
return redirect(url_for('main')) | ||
|
||
qparts = q.lower().strip().split() | ||
n = len(paper_dict) | ||
scores =[] | ||
for i, sd in enumerate(search_dict): | ||
score = sum(sd.get(q, 0) for q in qparts) | ||
if score ==0: continue | ||
score += 1.0*(n-i)/n | ||
scores.append((score, paper_dict[i])) | ||
scores.sort(reverse=True, key=lambda x: x[0]) | ||
papers = [x[1] for x in scores if x[0]>0] | ||
if len(papers)>40: | ||
papers = papers[:40] | ||
context = default_contex(papers, sort_order='search', search_query=q) | ||
return render_template('index.html', **context) | ||
|
||
|
||
@app.route('/sim/<paper_id>') | ||
def sim(paper_id:str=None): | ||
pidx = paperIDtoIDX.get(paper_id) | ||
if pidx is None: | ||
papers=[] | ||
else: | ||
sim_ix = sim_dict[pidx] | ||
papers = [paper_dict[cix] for cix in sim_ix] | ||
context = default_contex(papers, sort_order='sim') | ||
return render_template('index.html', **context) | ||
|
||
@app.route('/sim/<doi_prefix>/doi_suffix') | ||
def sim(doi_prefix=None, doi_suffix=None): | ||
pass | ||
# return render_template('index.html', **context) | ||
def default_contex(papers, **kwargs): | ||
gvars = {'num_papers': len(paper_dict)} | ||
gvars.update(kwargs) | ||
context = {'papers': papers, 'gvars':gvars} | ||
return context | ||
|
||
@app.route('/') | ||
def main(): | ||
app.run(host="0.0.0.0", port=5000, debug=False) | ||
papers = paper_dict[:40] | ||
context = default_contex(papers, sort_order='latest') | ||
return render_template('index.html', **context) | ||
# app.run(host="0.0.0.0", port=5000, debug=False) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
'use strict'; | ||
|
||
const Tweet = props => { | ||
const t = props.tweet; | ||
const turl = "https://twitter.com/" + t.name + "/status/" + t.id; | ||
return ( | ||
<div class='tweet'> | ||
<a href={turl}><img src={t.image_url}></img></a> | ||
<div class='meta'> | ||
<span class="following">{t.followers}</span> | ||
<span class="uname"><a href={turl}>{t.name}</a></span> | ||
<span class="text">{t.text}</span> | ||
</div> | ||
</div> | ||
) | ||
} | ||
|
||
const Tweets = props => { | ||
const [collapsed, setCollapsed] = React.useState(true); | ||
return ( | ||
//Make sure there are tweets. | ||
props.tweets.length > 0 && ( | ||
collapsed ? ( | ||
//Show just the summary statistics. | ||
<div class='rel_tweets_summary' onClick={() => setCollapsed(false)}> | ||
{props.tweets.length + " tweets"} | ||
</div> | ||
) : ( | ||
//Show tweets in expanded view. | ||
<div class='rel_tweets'> | ||
{props.tweets.map((jtweet, ix) => <Tweet key={ix} tweet={jtweet} />)} | ||
</div> | ||
) | ||
) | ||
); | ||
} | ||
|
||
const Paper = props => { | ||
const p = props.paper | ||
const url = p.rel_link + '.full.pdf'; | ||
return ( | ||
<div class={'rel_paper ' + p.rel_site}> | ||
<div class='dllinks'> | ||
<div class='metadata action'><a href={'/sim/' + p.rel_doi}>show similar</a></div> | ||
<div class='metadata action'><a href={url}>pdf</a></div> | ||
<div class='metadata rel_date'>{p.rel_date}</div> | ||
</div> | ||
<div class='rel_title'><a href={p.rel_link}>{p.rel_title}</a></div> | ||
<div class='rel_authors'>{p.rel_authors}</div> | ||
<div class='rel_abs'>{p.rel_abs}</div> | ||
<Tweets tweets={p.tweets} /> | ||
</div> | ||
) | ||
} | ||
|
||
const PaperList = props => { | ||
const lst = props.papers; | ||
const plst = lst.map((jpaper, ix) => <Paper key={ix} paper={jpaper} />); | ||
const msg = { | ||
"latest": "Showing latest papers:", | ||
"sim": 'Showing papers most similar to the first one:', | ||
"search": 'Search results for "' + gvars.search_query + '":' | ||
} | ||
return ( | ||
<div> | ||
<div id="info">{msg[gvars.sort_order]}</div> | ||
<div id="paperList" class="rel_papers"> | ||
{plst} | ||
</div> | ||
</div> | ||
) | ||
} | ||
|
||
ReactDOM.render(<PaperList papers={papers} />, document.getElementById('wrap')); |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
body { | ||
margin: 0; | ||
padding: 0; | ||
font-family: "Gill Sans", "Gill Sans MT", Helvetica, Arial, sans-serif; | ||
font-weight: 400; | ||
background-color: #F6F3E5; | ||
} | ||
|
||
#titdiv { | ||
height: 50px; | ||
background-color: blue; | ||
color: white; | ||
padding-top: 10px; | ||
padding-left: 20px; | ||
padding-bottom: 30px; | ||
border-bottom: 1px solid #540000; | ||
} | ||
h1 { | ||
font-size: 24px; | ||
padding: 0; | ||
margin: 0; | ||
font-weight: 400; | ||
} | ||
#titdiv a { | ||
font-weight: 300; | ||
} | ||
#titdiv a:visited { | ||
color: white; | ||
} | ||
#titdiv a:link { | ||
color: white; | ||
} | ||
|
||
#wrap { | ||
width: 970px; | ||
margin-left: auto; | ||
margin-right: auto; | ||
} | ||
|
||
.rel_paper { | ||
margin: 10px; | ||
padding: 6px; | ||
color: #333; | ||
font-size: 16px; | ||
border: 1px solid #333; | ||
background-color: white; | ||
} | ||
|
||
.rel_paper a { | ||
font-weight: normal; | ||
text-decoration: none; | ||
} | ||
|
||
.rel_authors { | ||
font-weight: 300; | ||
font-size: 14px; | ||
} | ||
|
||
.dllinks { | ||
border-radius: 5px; | ||
float: right; | ||
text-align: right; | ||
} | ||
|
||
.metadata { | ||
font-size: 16px; | ||
padding: 5px; | ||
display: inline-block; | ||
} | ||
.action { | ||
cursor: pointer; | ||
text-decoration: underline; | ||
} | ||
|
||
.rel_date { | ||
color: #900; | ||
} | ||
|
||
.rel_title { | ||
font-size: 18px; | ||
} | ||
|
||
.medrxiv .rel_title a { | ||
color: #0e4c92; | ||
} | ||
|
||
.biorxiv .rel_title a { | ||
color: #bc2635; | ||
} | ||
|
||
.rel_abs { | ||
font-family: Arial, Helvetica, sans-serif; | ||
font-weight: 300; | ||
background-color: #EFE; | ||
padding: 10px; | ||
margin-top: 10px; | ||
border-radius: 5px 5px 0px 0px; | ||
} | ||
|
||
.rel_tweets_summary { | ||
background-color: #EEF; | ||
font-family: Arial, Helvetica, sans-serif; | ||
font-size: 16px; | ||
border-radius: 5px; | ||
padding: 5px; | ||
margin-top: 5px; | ||
cursor: pointer; | ||
text-align: center; | ||
} | ||
|
||
.tweet { | ||
background-color: #EEF; | ||
font-family: Arial, Helvetica, sans-serif; | ||
font-size: 14px; | ||
border-radius: 5px; | ||
padding: 5px; | ||
margin-top: 5px; | ||
min-height: 48px; | ||
} | ||
|
||
.tweet img { | ||
float: left; | ||
margin-right: 5px; | ||
border-radius: 5px; | ||
} | ||
|
||
.tweet .meta { | ||
margin-top: 3px; | ||
} | ||
|
||
.tweet .following { | ||
background-color: #0e4c92; | ||
padding: 3px 5px 3px 5px; | ||
border-radius: 3px 0px 0px 3px; | ||
color: white; | ||
} | ||
|
||
.tweet .uname { | ||
padding: 3px 5px 3px 5px; | ||
background-color: #6a99d4; | ||
color: white; | ||
border-radius: 0px 3px 3px 0px; | ||
margin-right: 5px; | ||
} | ||
|
||
.tweet .uname a { | ||
color: white; | ||
} | ||
|
||
.tweet .text { | ||
line-height: 18px; | ||
} | ||
|
||
|
||
#info { | ||
background-color: #EEF; | ||
padding: 10px; | ||
margin: 10px; | ||
border: 1px solid #CCD; | ||
color: #005; | ||
font-weight: 300; | ||
} | ||
|
||
#sbox { | ||
margin-top: 10px; | ||
text-align: center; | ||
} | ||
|
||
#qfield { | ||
border: 5px solid white; | ||
width: 920px; | ||
border: solid 1px #999; | ||
|
||
height: 40px; | ||
font-size: 22px; | ||
color: "#333"; | ||
|
||
padding-left: 50px; | ||
|
||
background-image:url('/styles/search.png'); | ||
background-repeat:no-repeat; | ||
background-position:left center; | ||
outline:0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> | ||
<link rel="stylesheet" href="{{url_for('styles', filename='style.css')}}" type="text/css" > | ||
<title>TalkToMedPapers</title> | ||
<script> | ||
var papers = {{ papers | tojson}}; | ||
var gvars = {{ gvars | tojson}}; | ||
</script> | ||
</head> | ||
<body> | ||
<div id="home"> | ||
<h1>TalkToMedPapers</h1> | ||
|
||
</div> | ||
|
||
|
||
<!-- search box --> | ||
<div id="sbox"> | ||
<form action="/search" method="get"> | ||
<input name="q" type="text" id="qfield" value="{{ gvars.search_query }}"> | ||
</form> | ||
</div> | ||
|
||
|
||
|
||
<div id="wrap"> | ||
</div> | ||
|
||
<!-- React --> | ||
<script src="https://unpkg.com/react@16/umd/react.production.min.js" crossorigin></script> | ||
<script src="https://unpkg.com/react-dom@16/umd/react-dom.production.min.js" crossorigin></script> | ||
<!-- Babel for displaying JSX --> | ||
<script src="https://unpkg.com/babel-standalone@6/babel.min.js"></script> | ||
<!-- Load our React component --> | ||
<script src="{{ url_for('styles', filename='paper_list.js') }}" type="text/babel"></script> | ||
|
||
</body> | ||
</html> |