refactored notebooks closes #9

chrisPiemonte · Feb 15, 2019 · d68f156 · d68f156
1 parent eae2e3b
commit d68f156
Show file tree

Hide file tree

Showing 12 changed files with 132 additions and 127 deletions.
diff --git a/notebooks/reddit/api/1.tutorial.ipynb b/notebooks/reddit/api/1.tutorial.ipynb
@@ -20,8 +20,8 @@
     "import praw\n",
     "import sys, os\n",
     "sys.path.append(os.path.abspath(\"../../../src/\"))\n",
-    "from argminer.utils.common_utils import *\n",
-    "from argminer.utils.reddit_utils import *\n",
+    "from argonaut.utils.common_utils import *\n",
+    "from argonaut.utils.reddit_utils import *\n",
     "\n",
     "credentials = Credentials('../../../res/credentials.yml')\n",
     "credentials.reddit['USERNAME']"
@@ -159,17 +159,17 @@
       "ID:    98vj9e\n",
       "URL:   https://www.reddit.com/r/redditdev/comments/98vj9e/please_be_a_good_bot_citizen_of_reddit/\n",
       "TITLE: Please be a good 'bot' citizen of reddit\n",
-      "SCORE: 106\n",
+      "SCORE: 109\n",
       "\n",
-      "ID:    anfip9\n",
-      "URL:   https://www.reddit.com/r/redditdev/comments/anfip9/script_application_getting_invalid_grant_when_it/\n",
-      "TITLE: Script application getting invalid_grant when it shouldn't(?)\n",
-      "SCORE: 1\n",
+      "ID:    aqrwve\n",
+      "URL:   https://www.reddit.com/r/redditdev/comments/aqrwve/getting_refresh_token_unauthorized_error/\n",
+      "TITLE: getting refresh token (unauthorized error)\n",
+      "SCORE: 7\n",
       "\n",
-      "ID:    ancyve\n",
-      "URL:   https://www.reddit.com/r/redditdev/comments/ancyve/catching_wild_exceptions_in_python_27/\n",
-      "TITLE: Catching *wild* Exceptions in Python 2.7\n",
-      "SCORE: 1\n",
+      "ID:    aqknk3\n",
+      "URL:   https://www.reddit.com/r/redditdev/comments/aqknk3/wikipagerevision_date_inconsistency/\n",
+      "TITLE: wikipage.revision_date inconsistency\n",
+      "SCORE: 9\n",
       "\n"
      ]
     }
@@ -214,7 +214,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "REDDITOR NAME:  KurMike\n",
+      "REDDITOR NAME:  sqrayper\n",
       "REDDITOR KARMA: 3075\n"
      ]
     }
@@ -250,8 +250,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "NUMBER OF TOP LEVEL COMMENTS: 2\n",
-      "NUMBER OF ALL COMMENTS:       8\n"
+      "NUMBER OF TOP LEVEL COMMENTS: 1\n",
+      "NUMBER OF ALL COMMENTS:       3\n"
      ]
     }
    ],

diff --git a/notebooks/reddit/api/2.comment-approach.ipynb b/notebooks/reddit/api/2.comment-approach.ipynb
diff --git a/notebooks/reddit/api/3.user-approach.ipynb b/notebooks/reddit/api/3.user-approach.ipynb
diff --git a/notebooks/stackoverflow/api/1.tutorial.ipynb b/notebooks/stackoverflow/api/1.tutorial.ipynb
@@ -8,13 +8,14 @@
    "source": [
     "import sys, os\n",
     "import networkx as nx\n",
-    "import matplotlib.pyplot as plt\n",
     "from stackapi import StackAPI\n",
     "from bs4 import BeautifulSoup\n",
+    "import matplotlib.pyplot as plt\n",
     "\n",
     "sys.path.append(os.path.abspath(\"../../../src/\"))\n",
-    "from argminer.utils.common_utils import *\n",
-    "from argminer.utils.stack_utils import *"
+    "from argonaut.utils.stack_utils import *\n",
+    "from argonaut.utils.common_utils import *\n",
+    "from argonaut.argumentation.mine.from_stack import *"
    ]
   },
   {
@@ -50,9 +51,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "questions_url = 'questions'\n",
-    "answers_to    = 'questions/%s/answers'\n",
-    "comments_to   = 'answers/%s/comments'"
+    "questions_request_url = 'questions'\n",
+    "question_request_url  = 'questions/%s'\n",
+    "answers_request_to    = 'questions/%s/answers'\n",
+    "comments_request_to   = 'answers/%s/comments'\n",
+    "\n",
+    "question_id = 'aqrse4'\n",
+    "question_url = question_request_url % question_id"
    ]
   },
   {
@@ -73,7 +78,7 @@
     "site.page_size = 5     # get only 2 questions\n",
     "site.max_pages = 1     # for each request (one in this case)\n",
     "\n",
-    "questions = get_questions(num_questions=5, site=site)"
+    "questions = get_questions(questions_request_url, site=site)"
    ]
   }
  ],

diff --git a/notebooks/stackoverflow/api/2.comment-approach.ipynb b/notebooks/stackoverflow/api/2.comment-approach.ipynb
diff --git a/notebooks/stackoverflow/api/3.user-approach.ipynb b/notebooks/stackoverflow/api/3.user-approach.ipynb
diff --git a/notebooks/twitter/api/1.tutorial.ipynb b/notebooks/twitter/api/1.tutorial.ipynb
@@ -9,8 +9,8 @@
     "import tweepy, os, sys\n",
     "\n",
     "sys.path.append(os.path.abspath(\"../../../src/\"))\n",
-    "from argminer.utils.twitter_utils import *\n",
-    "from argminer.utils.common_utils import *\n",
+    "from argonaut.utils.twitter_utils import *\n",
+    "from argonaut.utils.common_utils import *\n",
     "\n",
     "credentials = Credentials('../../../res/credentials.yml')"
    ]

diff --git a/notebooks/twitter/api/2.comment-approach.ipynb b/notebooks/twitter/api/2.comment-approach.ipynb
diff --git a/notebooks/twitter/api/3.user-approach.ipynb b/notebooks/twitter/api/3.user-approach.ipynb
diff --git a/src/argonaut/argumentation/convert/to_prolog.py b/src/argonaut/argumentation/convert/to_prolog.py
@@ -2,7 +2,6 @@
 from argonaut.argumentation.convert import common
 from argonaut.argumentation.convert.frameworks import bwaf, waf, baf, af
 
-# ACCEPTED_FRAMEWORKS = ['af', 'baf', 'waf', 'bwaf']
 
 def node_to_argument(node):
     return f'argument({str(node)}).'
@@ -24,15 +23,15 @@ def edge_to_relationship(source, dest, weight, framework=common.BWAF, n_decimal=
 def edge_to_rel_weight(source, dest, weight, framework=common.BWAF, n_decimal=2):
     assert framework in common.ACCEPTED_FRAMEWORKS
     weight = round(weight, n_decimal)
-    rel_weight   = ''
+    rel_weight = ''
     if framework == common.BWAF:
-        rel_weight   = bwaf.edge_to_rel_weight(source, dest, weight)
+        rel_weight = bwaf.edge_to_rel_weight(source, dest, weight)
     elif framework == common.BAF:
-        rel_weight   = baf.edge_to_rel_weight(source, dest, weight)
+        rel_weight = baf.edge_to_rel_weight(source, dest, weight)
     elif framework == common.WAF:
-        rel_weight   = waf.edge_to_rel_weight(source, dest, weight)
+        rel_weight = waf.edge_to_rel_weight(source, dest, weight)
     elif framework == common.AF:
-        rel_weight   = af.edge_to_rel_weight(source, dest, weight)
+        rel_weight = af.edge_to_rel_weight(source, dest, weight)
     return rel_weight
 
 def to_facts(Graph, framework=common.BWAF, n_decimal=2, verbose=False):
@@ -72,7 +71,6 @@ def to_facts(Graph, framework=common.BWAF, n_decimal=2, verbose=False):
     common.remove_blanks(relationships_set)
     common.remove_blanks(rel_weights_set)
     if verbose:
-
         print(f'MINED {framework} FROM GRAPH.')
         print(f'MINED {len(arguments_set)} ARGUMENTS.')
         print(f"""MINED {len(relationships_set)} RELATIONSHIPS

diff --git a/src/argonaut/mine.py b/src/argonaut/mine.py
@@ -18,20 +18,17 @@
 @click.option(
     '--source',
     '-s',
-    prompt='Where do you want to extract argument from\nSources available twitter | reddit | stackoverflow:',
-    help='Argumentation minin source')
+    prompt='Where do you want to extract argument from \n Sources available twitter | reddit | stackoverflow:',
+    help='Argumentation Mining source. Sources available twitter | reddit | stackoverflow')
 
 # REDDIT
-# prompt="ID of the reddit submission you want to mine (you can find it in the URL):",
-@click.option( '--submission_id', default=None, help='Reddit Submission ID')
+@click.option('--submission_id', default=None, help='Reddit Submission ID (you can find it in the URL)')
 
 # TWITTER
-# prompt="Twitter query / topic you want to mine:",
 @click.option('--query', default=None, help='Twitter Query')
 
 # STACKOVERFLOW
-# prompt="StackOverflow question ID (you can find it in the URL):",
-@click.option('--question_id', default=None, help='StackOverflow Question')
+@click.option('--question_id', default=None, help='StackOverflow Question ID (you can find it in the URL)')
 
 # USEFUL PARAMS
 @click.option(
@@ -43,19 +40,19 @@
 @click.option(
     '--framework',
     '-f',
-    default='bwaf',
+    default=BWAF,
     prompt='Frameworks available: bwaf | baf | waf | af: DEFAULT ->',
     help='Argumentation Framework.')
 
 # DON'T CARE PARAMS, LEAVE DEFAULT
-@click.option('--not_save', is_flag=True, help='If True it saves the results.')
+@click.option('--no_save', is_flag=True, help='If True it saves the results.')
 @click.option('--path', '-p', default=None, help='Path where to save the results.')
 @click.option('--multiedges', is_flag=True, help='If True, keeps multiedges.')
 @click.option('--num_decimal', default=2, help='Number of decimals.')
 @click.option('--verbose', '-v', is_flag=True, help='Oh come on.')
 
 def mine_arguments(source, submission_id, query, question_id, mode,
-                   framework, not_save, path, multiedges, num_decimal, verbose):
+                   framework, no_save, path, multiedges, num_decimal, verbose):
     assert mode in ACCEPTED_MODES, 'NOT VALID MINING METHOD'
     assert framework in ACCEPTED_FRAMEWORKS, 'NOT VALID FRAMEWORK'
     if verbose:
@@ -65,43 +62,41 @@ def mine_arguments(source, submission_id, query, question_id, mode,
         Graph = from_reddit.get_debate_graph(
             submissionId=submission_id,
             mode=mode,
-            save=not not_save,
+            save=not no_save,
             path=path,
             multiedges=multiedges,
             framework=framework,
             n_decimal=num_decimal,
             verbose=verbose
         )
-        print(source, submission_id, mode, not not_save, path, multiedges, framework, num_decimal, verbose)
-
     elif source.lower() == TWITTER:
         assert query is not None, 'TWITTER QUERY NOT PRESENT'
         Graph = from_twitter.get_debate_graph(
             query=query,
             mode=mode,
-            save=not not_save,
+            save=not no_save,
             path=path,
             multiedges=multiedges,
             framework=framework,
             n_decimal=num_decimal,
             verbose=verbose
         )
-
     elif source.lower() == STACKOVERFLOW:
         assert question_id is not None, 'STACKOVERFLOW QUESTION ID NOT PRESENT'
         Graph = from_stack.get_debate_graph(
             question=submission_id,
             mode=mode,
-            save=not not_save,
+            save=not no_save,
             path=path,
             multiedges=multiedges,
             framework=framework,
             n_decimal=num_decimal,
             verbose=verbose
         )
-
     else:
         raise(SourceNotValidException('SOURCE NOT VALID'))
+    # print(source, submission_id, mode, not no_save, path, multiedges, framework, num_decimal, verbose)
+    print('. . . END MINING', '\n')
 
 if __name__ == '__main__':
     mine_arguments()
diff --git a/src/argonaut/text/TextAnalyzer.py b/src/argonaut/text/TextAnalyzer.py
@@ -6,6 +6,8 @@
 import argonaut.utils.common_utils as utils
 from nltk.sentiment.vader import SentimentIntensityAnalyzer
 
+verbose = False
+
 def __get_model(path, url, verbose=False):
     if path.is_file():
         if verbose:
@@ -19,18 +21,18 @@ def __get_model(path, url, verbose=False):
     return gensim.models.KeyedVectors.load_word2vec_format(str(path), binary=True, limit=50000)
 
 sia   = SentimentIntensityAnalyzer()
-model = __get_model(utils.W2V_GOOGLENEWS_MODEL_PATH, utils.W2V_GOOGLENEWS_MODEL_URL, verbose=False)
+model = __get_model(utils.W2V_GOOGLENEWS_MODEL_PATH, utils.W2V_GOOGLENEWS_MODEL_URL, verbose=verbose)
 
-def get_sentiment(text):
-    return sia.polarity_scores(text)['compound']
+def get_sentiment(sentence):
+    return sia.polarity_scores(sentence)['compound']
 
-def get_similarity(text, other_text):
-    text_avg_vector = __avg_sentence_vector(text.split(), model=model)
-    other_text_avg_vector = __avg_sentence_vector(other_text.split(), model=model)
+def get_similarity(sentence, other_sentence):
+    sentence_avg_vector = __avg_sentence_vector(sentence.split(), model=model)
+    other_sentence_avg_vector = __avg_sentence_vector(other_sentence.split(), model=model)
     similarity = 0.001
     # if both are non all zeroes vectors
-    if not(__is_all_zeroes(text_avg_vector) or __is_all_zeroes(other_text_avg_vector)):
-        similarity = __cosine_similarity(text_avg_vector, other_text_avg_vector)
+    if not(__is_all_zeroes(sentence_avg_vector) or __is_all_zeroes(other_sentence_avg_vector)):
+        similarity = __cosine_similarity(sentence_avg_vector, other_sentence_avg_vector)
     return similarity
 
 # function to average all words vectors in a given sentence