-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
194 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Copyright 2017 Hussein S. Al-Olimat, hussein@knoesis.org\n", | ||
"\n", | ||
"This software is released under the GNU Affero General Public License (AGPL) v3.0 License." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# pytest is an example usecase of using LNEx in Python 3" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!pip install wordsegment\n", | ||
"!pip install shapely\n", | ||
"!pip install nltk\n", | ||
"!pip install elasticsearch\n", | ||
"!pip install elasticsearch_dsl\n", | ||
"!pip install geopy" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import json, re\n", | ||
"from shapely.geometry import MultiPoint\n", | ||
"\n", | ||
"import sys \n", | ||
"sys.path.append(\"LNEx\")\n", | ||
"import LNEx as lnex" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def read_tweets():\n", | ||
" tweets_file = \"_Data/sample_tweets.txt\"\n", | ||
" # read tweets from file to list\n", | ||
" with open(tweets_file) as f:\n", | ||
" tweets = f.read().splitlines()\n", | ||
" return tweets\n", | ||
"\n", | ||
"def init_using_elasticindex(bb, cache, augmentType, dataset, capital_word_shape):\n", | ||
" lnex.elasticindex(conn_string='localhost:9200', index_name=\"photon\")\n", | ||
"\n", | ||
" geo_info = lnex.initialize( bb, augmentType=augmentType,\n", | ||
" cache=cache,\n", | ||
" dataset_name=dataset,\n", | ||
" capital_word_shape=capital_word_shape)\n", | ||
" return geo_info" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Initializing LNEx ...\n", | ||
"Done Initialization ...\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"bbs = { \"chennai\": [12.74, 80.066986084, 13.2823848224, 80.3464508057],\n", | ||
" \"louisiana\": [29.4563, -93.3453, 31.4521, -89.5276],\n", | ||
" \"houston\": [29.4778611958, -95.975189209, 30.1463147381, -94.8889160156],\n", | ||
" \"columbus\": [39.808631, -83.2102799, 40.1572719, -82.7713781],\n", | ||
" \"test\": [41.6187434973, -83.7106928844, 41.6245055116, -83.7017216664]}\n", | ||
"\n", | ||
"dataset = \"chennai\"\n", | ||
"\n", | ||
"geo_info = init_using_elasticindex(bbs[dataset], cache=False, augmentType=\"HP\", \n", | ||
" dataset=dataset, capital_word_shape=False)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"scrolled": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"tambaram (43, 51) tambaram ['7066', '13478']\n", | ||
"Mudichur (29, 37) mudichur ['3205']\n", | ||
"##################################################\n", | ||
"Jones road (64, 74) jones road ['9569', '6472']\n", | ||
"Saidapet (0, 8) saidapet ['1180', '3771', '11613', '13880', '133', '13201']\n", | ||
"##################################################\n", | ||
"Chennai Central (12, 27) chennai central ['7267', '7347']\n", | ||
"Chennai Egmore (28, 42) chennai egmore ['5346', '7768']\n", | ||
"##################################################\n", | ||
"New Avadi road (20, 34) new avadi road ['2741', '7133', '16966', '16786', '15324', '16791', '8', '14795', '2288']\n", | ||
"Water tank road (39, 54) water tank rd ['5773']\n", | ||
"##################################################\n", | ||
"##################################################\n", | ||
"mambalam (29, 37) mambalam ['12606']\n", | ||
"new avadi rd (8, 20) new avadi road ['2741', '7133', '16966', '16786', '15324', '16791', '8', '14795', '2288']\n", | ||
"chennai (21, 28) chennai ['10301', '10318']\n", | ||
"##################################################\n", | ||
"##################################################\n", | ||
"avadi (4, 9) avadi ['14979', '607']\n", | ||
"##################################################\n", | ||
"pathur (21, 27) pathur ['10359']\n", | ||
"##################################################\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"for tweet in read_tweets():\n", | ||
" # remove hashtags, urls, etc...\n", | ||
" tweet = ' '.join(re.sub(\"(#[A-Za-z0-9]+)|([^0-9A-Za-z \\t])|(\\w+:\\/\\/\\S+)\",\" \", tweet).split())\n", | ||
" for output in lnex.extract(tweet):\n", | ||
" print(output[0], output[1], output[2], output[3][\"main\"])\n", | ||
" print(\"#\"*50)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters