Skip to content

Commit

Permalink
refactored notebooks closes #9
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisPiemonte committed Feb 15, 2019
1 parent eae2e3b commit d68f156
Show file tree
Hide file tree
Showing 12 changed files with 132 additions and 127 deletions.
28 changes: 14 additions & 14 deletions notebooks/reddit/api/1.tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
"import praw\n",
"import sys, os\n",
"sys.path.append(os.path.abspath(\"../../../src/\"))\n",
"from argminer.utils.common_utils import *\n",
"from argminer.utils.reddit_utils import *\n",
"from argonaut.utils.common_utils import *\n",
"from argonaut.utils.reddit_utils import *\n",
"\n",
"credentials = Credentials('../../../res/credentials.yml')\n",
"credentials.reddit['USERNAME']"
Expand Down Expand Up @@ -159,17 +159,17 @@
"ID: 98vj9e\n",
"URL: https://www.reddit.com/r/redditdev/comments/98vj9e/please_be_a_good_bot_citizen_of_reddit/\n",
"TITLE: Please be a good 'bot' citizen of reddit\n",
"SCORE: 106\n",
"SCORE: 109\n",
"\n",
"ID: anfip9\n",
"URL: https://www.reddit.com/r/redditdev/comments/anfip9/script_application_getting_invalid_grant_when_it/\n",
"TITLE: Script application getting invalid_grant when it shouldn't(?)\n",
"SCORE: 1\n",
"ID: aqrwve\n",
"URL: https://www.reddit.com/r/redditdev/comments/aqrwve/getting_refresh_token_unauthorized_error/\n",
"TITLE: getting refresh token (unauthorized error)\n",
"SCORE: 7\n",
"\n",
"ID: ancyve\n",
"URL: https://www.reddit.com/r/redditdev/comments/ancyve/catching_wild_exceptions_in_python_27/\n",
"TITLE: Catching *wild* Exceptions in Python 2.7\n",
"SCORE: 1\n",
"ID: aqknk3\n",
"URL: https://www.reddit.com/r/redditdev/comments/aqknk3/wikipagerevision_date_inconsistency/\n",
"TITLE: wikipage.revision_date inconsistency\n",
"SCORE: 9\n",
"\n"
]
}
Expand Down Expand Up @@ -214,7 +214,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"REDDITOR NAME: KurMike\n",
"REDDITOR NAME: sqrayper\n",
"REDDITOR KARMA: 3075\n"
]
}
Expand Down Expand Up @@ -250,8 +250,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"NUMBER OF TOP LEVEL COMMENTS: 2\n",
"NUMBER OF ALL COMMENTS: 8\n"
"NUMBER OF TOP LEVEL COMMENTS: 1\n",
"NUMBER OF ALL COMMENTS: 3\n"
]
}
],
Expand Down
14 changes: 7 additions & 7 deletions notebooks/reddit/api/2.comment-approach.ipynb

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions notebooks/reddit/api/3.user-approach.ipynb

Large diffs are not rendered by default.

19 changes: 12 additions & 7 deletions notebooks/stackoverflow/api/1.tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
"source": [
"import sys, os\n",
"import networkx as nx\n",
"import matplotlib.pyplot as plt\n",
"from stackapi import StackAPI\n",
"from bs4 import BeautifulSoup\n",
"import matplotlib.pyplot as plt\n",
"\n",
"sys.path.append(os.path.abspath(\"../../../src/\"))\n",
"from argminer.utils.common_utils import *\n",
"from argminer.utils.stack_utils import *"
"from argonaut.utils.stack_utils import *\n",
"from argonaut.utils.common_utils import *\n",
"from argonaut.argumentation.mine.from_stack import *"
]
},
{
Expand Down Expand Up @@ -50,9 +51,13 @@
"metadata": {},
"outputs": [],
"source": [
"questions_url = 'questions'\n",
"answers_to = 'questions/%s/answers'\n",
"comments_to = 'answers/%s/comments'"
"questions_request_url = 'questions'\n",
"question_request_url = 'questions/%s'\n",
"answers_request_to = 'questions/%s/answers'\n",
"comments_request_to = 'answers/%s/comments'\n",
"\n",
"question_id = 'aqrse4'\n",
"question_url = question_request_url % question_id"
]
},
{
Expand All @@ -73,7 +78,7 @@
"site.page_size = 5 # get only 2 questions\n",
"site.max_pages = 1 # for each request (one in this case)\n",
"\n",
"questions = get_questions(num_questions=5, site=site)"
"questions = get_questions(questions_request_url, site=site)"
]
}
],
Expand Down
27 changes: 17 additions & 10 deletions notebooks/stackoverflow/api/2.comment-approach.ipynb

Large diffs are not rendered by default.

33 changes: 13 additions & 20 deletions notebooks/stackoverflow/api/3.user-approach.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions notebooks/twitter/api/1.tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"import tweepy, os, sys\n",
"\n",
"sys.path.append(os.path.abspath(\"../../../src/\"))\n",
"from argminer.utils.twitter_utils import *\n",
"from argminer.utils.common_utils import *\n",
"from argonaut.utils.twitter_utils import *\n",
"from argonaut.utils.common_utils import *\n",
"\n",
"credentials = Credentials('../../../res/credentials.yml')"
]
Expand Down
33 changes: 17 additions & 16 deletions notebooks/twitter/api/2.comment-approach.ipynb

Large diffs are not rendered by default.

28 changes: 16 additions & 12 deletions notebooks/twitter/api/3.user-approach.ipynb

Large diffs are not rendered by default.

12 changes: 5 additions & 7 deletions src/argonaut/argumentation/convert/to_prolog.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from argonaut.argumentation.convert import common
from argonaut.argumentation.convert.frameworks import bwaf, waf, baf, af

# ACCEPTED_FRAMEWORKS = ['af', 'baf', 'waf', 'bwaf']

def node_to_argument(node):
return f'argument({str(node)}).'
Expand All @@ -24,15 +23,15 @@ def edge_to_relationship(source, dest, weight, framework=common.BWAF, n_decimal=
def edge_to_rel_weight(source, dest, weight, framework=common.BWAF, n_decimal=2):
assert framework in common.ACCEPTED_FRAMEWORKS
weight = round(weight, n_decimal)
rel_weight = ''
rel_weight = ''
if framework == common.BWAF:
rel_weight = bwaf.edge_to_rel_weight(source, dest, weight)
rel_weight = bwaf.edge_to_rel_weight(source, dest, weight)
elif framework == common.BAF:
rel_weight = baf.edge_to_rel_weight(source, dest, weight)
rel_weight = baf.edge_to_rel_weight(source, dest, weight)
elif framework == common.WAF:
rel_weight = waf.edge_to_rel_weight(source, dest, weight)
rel_weight = waf.edge_to_rel_weight(source, dest, weight)
elif framework == common.AF:
rel_weight = af.edge_to_rel_weight(source, dest, weight)
rel_weight = af.edge_to_rel_weight(source, dest, weight)
return rel_weight

def to_facts(Graph, framework=common.BWAF, n_decimal=2, verbose=False):
Expand Down Expand Up @@ -72,7 +71,6 @@ def to_facts(Graph, framework=common.BWAF, n_decimal=2, verbose=False):
common.remove_blanks(relationships_set)
common.remove_blanks(rel_weights_set)
if verbose:

print(f'MINED {framework} FROM GRAPH.')
print(f'MINED {len(arguments_set)} ARGUMENTS.')
print(f"""MINED {len(relationships_set)} RELATIONSHIPS
Expand Down
29 changes: 12 additions & 17 deletions src/argonaut/mine.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,17 @@
@click.option(
'--source',
'-s',
prompt='Where do you want to extract argument from\nSources available twitter | reddit | stackoverflow:',
help='Argumentation minin source')
prompt='Where do you want to extract argument from \n Sources available twitter | reddit | stackoverflow:',
help='Argumentation Mining source. Sources available twitter | reddit | stackoverflow')

# REDDIT
# prompt="ID of the reddit submission you want to mine (you can find it in the URL):",
@click.option( '--submission_id', default=None, help='Reddit Submission ID')
@click.option('--submission_id', default=None, help='Reddit Submission ID (you can find it in the URL)')

# TWITTER
# prompt="Twitter query / topic you want to mine:",
@click.option('--query', default=None, help='Twitter Query')

# STACKOVERFLOW
# prompt="StackOverflow question ID (you can find it in the URL):",
@click.option('--question_id', default=None, help='StackOverflow Question')
@click.option('--question_id', default=None, help='StackOverflow Question ID (you can find it in the URL)')

# USEFUL PARAMS
@click.option(
Expand All @@ -43,19 +40,19 @@
@click.option(
'--framework',
'-f',
default='bwaf',
default=BWAF,
prompt='Frameworks available: bwaf | baf | waf | af: DEFAULT ->',
help='Argumentation Framework.')

# DON'T CARE PARAMS, LEAVE DEFAULT
@click.option('--not_save', is_flag=True, help='If True it saves the results.')
@click.option('--no_save', is_flag=True, help='If True it saves the results.')
@click.option('--path', '-p', default=None, help='Path where to save the results.')
@click.option('--multiedges', is_flag=True, help='If True, keeps multiedges.')
@click.option('--num_decimal', default=2, help='Number of decimals.')
@click.option('--verbose', '-v', is_flag=True, help='Oh come on.')

def mine_arguments(source, submission_id, query, question_id, mode,
framework, not_save, path, multiedges, num_decimal, verbose):
framework, no_save, path, multiedges, num_decimal, verbose):
assert mode in ACCEPTED_MODES, 'NOT VALID MINING METHOD'
assert framework in ACCEPTED_FRAMEWORKS, 'NOT VALID FRAMEWORK'
if verbose:
Expand All @@ -65,43 +62,41 @@ def mine_arguments(source, submission_id, query, question_id, mode,
Graph = from_reddit.get_debate_graph(
submissionId=submission_id,
mode=mode,
save=not not_save,
save=not no_save,
path=path,
multiedges=multiedges,
framework=framework,
n_decimal=num_decimal,
verbose=verbose
)
print(source, submission_id, mode, not not_save, path, multiedges, framework, num_decimal, verbose)

elif source.lower() == TWITTER:
assert query is not None, 'TWITTER QUERY NOT PRESENT'
Graph = from_twitter.get_debate_graph(
query=query,
mode=mode,
save=not not_save,
save=not no_save,
path=path,
multiedges=multiedges,
framework=framework,
n_decimal=num_decimal,
verbose=verbose
)

elif source.lower() == STACKOVERFLOW:
assert question_id is not None, 'STACKOVERFLOW QUESTION ID NOT PRESENT'
Graph = from_stack.get_debate_graph(
question=submission_id,
mode=mode,
save=not not_save,
save=not no_save,
path=path,
multiedges=multiedges,
framework=framework,
n_decimal=num_decimal,
verbose=verbose
)

else:
raise(SourceNotValidException('SOURCE NOT VALID'))
# print(source, submission_id, mode, not no_save, path, multiedges, framework, num_decimal, verbose)
print('. . . END MINING', '\n')

if __name__ == '__main__':
mine_arguments()
18 changes: 10 additions & 8 deletions src/argonaut/text/TextAnalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import argonaut.utils.common_utils as utils
from nltk.sentiment.vader import SentimentIntensityAnalyzer

verbose = False

def __get_model(path, url, verbose=False):
if path.is_file():
if verbose:
Expand All @@ -19,18 +21,18 @@ def __get_model(path, url, verbose=False):
return gensim.models.KeyedVectors.load_word2vec_format(str(path), binary=True, limit=50000)

sia = SentimentIntensityAnalyzer()
model = __get_model(utils.W2V_GOOGLENEWS_MODEL_PATH, utils.W2V_GOOGLENEWS_MODEL_URL, verbose=False)
model = __get_model(utils.W2V_GOOGLENEWS_MODEL_PATH, utils.W2V_GOOGLENEWS_MODEL_URL, verbose=verbose)

def get_sentiment(text):
return sia.polarity_scores(text)['compound']
def get_sentiment(sentence):
return sia.polarity_scores(sentence)['compound']

def get_similarity(text, other_text):
text_avg_vector = __avg_sentence_vector(text.split(), model=model)
other_text_avg_vector = __avg_sentence_vector(other_text.split(), model=model)
def get_similarity(sentence, other_sentence):
sentence_avg_vector = __avg_sentence_vector(sentence.split(), model=model)
other_sentence_avg_vector = __avg_sentence_vector(other_sentence.split(), model=model)
similarity = 0.001
# if both are non all zeroes vectors
if not(__is_all_zeroes(text_avg_vector) or __is_all_zeroes(other_text_avg_vector)):
similarity = __cosine_similarity(text_avg_vector, other_text_avg_vector)
if not(__is_all_zeroes(sentence_avg_vector) or __is_all_zeroes(other_sentence_avg_vector)):
similarity = __cosine_similarity(sentence_avg_vector, other_sentence_avg_vector)
return similarity

# function to average all words vectors in a given sentence
Expand Down

0 comments on commit d68f156

Please sign in to comment.