working on #19

chrisPiemonte · Feb 21, 2019 · 3edf4d7 · 3edf4d7
1 parent d68f156
commit 3edf4d7
Show file tree

Hide file tree

Showing 9 changed files with 268 additions and 51 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@ __pycache__/
 *$py.class
 
 .ipynb_checkpoints
+.idea/
 *.pyc
 *_.ipynb
 .DS_Store

diff --git a/src/argonaut/argumentation/convert/to_lines.py b/src/argonaut/argumentation/convert/to_lines.py
@@ -0,0 +1,40 @@
+import argonaut.utils.io as io
+import argonaut.utils.common_utils as utils
+
+
+def user_nodes_to_lines(Graph, sep=','):
+    lines = []
+    for node_id in Graph.nodes:
+        text   = list_to_string(Graph.node[node_id].get('text', ''))
+        lines += user_node_to_text(node_id, text, sep=sep)
+    return lines
+
+def comment_nodes_to_lines(Graph, sep=','):
+    lines = []
+    for node_id in Graph.nodes:
+        text = Graph.node[node_id].get('text', '')
+        user = Graph.node[node_id].get('user', '')
+        lines += comment_node_to_text(node_id, text, user, sep=sep)
+    return lines
+
+def edges_to_lines(Graph, sep=','):
+    lines = []
+    for source, dest, data in Graph.edges(data=True):
+        weight = str(data['weight']) if 'weight' in data else ''
+        lines += edge_to_text(source, dest, weight, sep=sep)
+    return lines
+
+
+
+def user_node_to_text(node_id, text, sep=','):
+    return f'{node_id}{sep}{text}'
+
+def comment_node_to_text(node_id, text, user, sep=','):
+    return f'{node_id}{sep}{text}{sep}{user}'
+
+def edge_to_text(source, dest, weight, sep=','):
+    return f'{source}{sep}{dest}{sep}{weight}'
+
+
+def list_to_string(l, sep=' --- '):
+    return sep.join(l)
diff --git a/src/argonaut/argumentation/convert/to_prolog.py b/src/argonaut/argumentation/convert/to_prolog.py
@@ -1,3 +1,4 @@
+import argonaut.utils.io as io
 import argonaut.utils.common_utils as utils
 from argonaut.argumentation.convert import common
 from argonaut.argumentation.convert.frameworks import bwaf, waf, baf, af

diff --git a/src/argonaut/argumentation/mine/common.py b/src/argonaut/argumentation/mine/common.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 import matplotlib.pyplot as plt
 import argonaut.text.TextAnalyzer
+import argonaut.utils.io as io
 from argonaut.utils.twitter_utils import *
 import argonaut.utils.common_utils as utils
 from argonaut.argumentation.convert import common
@@ -50,17 +51,6 @@ def merge_multiedges(MultiDiGraph):
             Graph[u][v]['weight'] /= Graph[u][v]['num']
     return Graph
 
-def save_graph(Graph, suffix, path=None, framework=common.BWAF, n_decimal=2, verbose=False):
-    graph_name = utils.get_graph_name(suffix=suffix)
-    graph_output_path = Path(utils.INTERIM_DATA_PATH, graph_name) if path is None else path + '_graph.pickle'
-    utils.pickle_graph(Graph, graph_output_path)
-    # SAVE PROLOG FACTS
-    facts = to_prolog.to_facts(Graph, framework=framework, n_decimal=n_decimal, verbose=verbose)
-    facts_name = utils.get_facts_name(graph_name=graph_name, framework=framework)
-    facts_output_path = Path(utils.PROLOG_DATA_PATH, facts_name) if path is None else path + '_facts.pl'
-    utils.save_facts(facts, facts_output_path)
-    print('Everything saved successfully.', '\n')
-
 def count_nodes(Graph):
     return len(set(Graph.nodes()))
 

diff --git a/src/argonaut/argumentation/mine/from_reddit.py b/src/argonaut/argumentation/mine/from_reddit.py
@@ -1,6 +1,7 @@
 import sys, os, praw
 import networkx as nx
 from bs4 import BeautifulSoup
+import argonaut.utils.io as io
 import argonaut.utils.common_utils as utils
 from argonaut.argumentation.mine.common import *
 import argonaut.text.TextAnalyzer as TextAnalyzer
@@ -30,7 +31,7 @@ def get_debate_graph(submissionId=None, mode='comments', save=True, path=None,
         Graph = merge_multiedges(Graph)
     if save:
         suffix = f'reddit_{mode}'
-        save_graph(Graph, suffix, path=path, framework=framework, n_decimal=n_decimal, verbose=verbose)
+        io.save_graph(Graph, suffix, path=path, mode=mode, framework=framework, n_decimal=n_decimal, verbose=verbose)
     if verbose:
         print(f'NUMBER OF NODES IN THE GRAPH:      {count_nodes(Graph)}')
         print(f'NUMBER OF EDGES IN THE GRAPH:      {count_edges(Graph)}')
@@ -55,6 +56,10 @@ def __build_graph_from_comments(comments):
             parent_sentiment  = TextAnalyzer.get_sentiment(comment.parent_text)
             similarity = TextAnalyzer.get_similarity(comment.text, comment.parent_text)
             weight = get_edge_weight(similarity, comment_sentiment, parent_sentiment)
+            # ADD NODES ATTRIBUTES
+            Graph.add_node(comment.id, text=comment.text, user=comment.user)
+            Graph.add_node(comment.parent, text=comment.parent_text, user=comment.parent_user)
+            # ADD EDGE
             Graph.add_edge(comment.id, comment.parent, weight=weight)
         else:
             pass
@@ -68,7 +73,16 @@ def __build_graph_from_users(comments):
             parent_sentiment  = TextAnalyzer.get_sentiment(comment.parent_text)
             similarity = TextAnalyzer.get_similarity(comment.text, comment.parent_text)
             weight = get_edge_weight(similarity, comment_sentiment, parent_sentiment)
-            # TODO merge edges
+            # ADD NODES ATTRIBUTES
+            if comment.user in Graph.node:
+                Graph.node[comment.user]['text'].add(comment.text)
+            else:
+                Graph.add_node(comment.user, text={comment.text})
+            if comment.parent_user in Graph.node:
+                Graph.node[comment.parent_user]['text'].add(comment.parent_text)
+            else:
+                Graph.add_node(comment.parent_user, text={comment.parent_text})
+            # ADD EDGE
             Graph.add_edge(comment.user, comment.parent_user, weight=weight)
         else:
             pass

diff --git a/src/argonaut/argumentation/mine/from_stack.py b/src/argonaut/argumentation/mine/from_stack.py
@@ -2,6 +2,7 @@
 import networkx as nx
 from random import randint
 from stackapi import StackAPI
+import argonaut.utils.io as io
 import argonaut.utils.common_utils as utils
 from argonaut.argumentation.mine.common import *
 import argonaut.utils.stack_utils as stack_utils
@@ -27,7 +28,7 @@ def get_debate_graph(question=None, mode='comments', save=True, path=None,
         Graph = merge_multiedges(Graph)
     if save:
         suffix = f'stack_{mode}'
-        save_graph(Graph, suffix, path=path, framework=framework, n_decimal=n_decimal, verbose=verbose)
+        io.save_graph(Graph, suffix, path=path, mode=mode, framework=framework, n_decimal=n_decimal, verbose=verbose)
     if verbose:
         print(f'NUMBER OF NODES IN THE GRAPH:      {count_nodes(Graph)}')
         print(f'NUMBER OF EDGES IN THE GRAPH:      {count_edges(Graph)}')
@@ -44,7 +45,7 @@ def __build_graph_from_comments(questions):
     for question in questions:
         question_id = stack_utils.get_question_id(question)
         question_sentiment = TextAnalyzer.get_sentiment(stack_utils.get_text(question))
-        Graph.add_node(question_id)
+        Graph.add_node(question_id, text=stack_utils.get_text(question), user=stack_utils.get_user_id(question))
         answers = get_answers(question_id, site=site)
 
         for answer in answers['items']:
@@ -53,6 +54,8 @@ def __build_graph_from_comments(questions):
             similarity = TextAnalyzer.get_similarity(stack_utils.get_text(question), stack_utils.get_text(answer))
             # compute the weight of the edge
             weight = get_edge_weight(similarity, answer_sentiment, question_sentiment)
+
+            Graph.add_node(answer_id, text=stack_utils.get_text(answer), user=stack_utils.get_user_id(answer))
             Graph.add_edge(answer_id, question_id, weight=weight)
             comments = get_comments(answer_id, site=site)
 
@@ -62,6 +65,8 @@ def __build_graph_from_comments(questions):
                 similarity = TextAnalyzer.get_similarity(stack_utils.get_text(answer), stack_utils.get_text(comment))
                 # compute the weight of the edge
                 weight = get_edge_weight(similarity, comment_sentiment, answer_sentiment)
+
+                Graph.add_node(comment_id, text=stack_utils.get_text(comment), user=stack_utils.get_user_id(comment))
                 Graph.add_edge(comment_id, answer_id, weight=weight)
     return Graph
 
@@ -76,7 +81,12 @@ def __build_graph_from_users(questions):
         question_id        = stack_utils.get_question_id(question)
         question_user_id   = stack_utils.get_user_id(question)
         question_sentiment = TextAnalyzer.get_sentiment(stack_utils.get_text(question))
-        Graph.add_node(question_user_id)
+
+        if question_user_id in Graph.node:
+            Graph.node[question_user_id]['text'].add(stack_utils.get_text(question))
+        else:
+            Graph.add_node(question_user_id, text={stack_utils.get_text(question)})
+        # Graph.add_node(question_user_id)
         answers = get_answers(question_id, site=site)
 
         for answer in answers['items']:
@@ -86,6 +96,11 @@ def __build_graph_from_users(questions):
             similarity       = TextAnalyzer.get_similarity(stack_utils.get_text(answer), stack_utils.get_text(question))
             # compute the weight of the edge
             weight = get_edge_weight(similarity, answer_sentiment, question_sentiment)
+
+            if answer_user_id in Graph.node:
+                Graph.node[answer_user_id]['text'].add(stack_utils.get_text(answer))
+            else:
+                Graph.add_node(answer_user_id, text={stack_utils.get_text(answer)})
             Graph.add_edge(answer_user_id, question_user_id, weight=weight)
             comments = get_comments(answer_id, site=site)
 
@@ -96,6 +111,11 @@ def __build_graph_from_users(questions):
                 similarity        = TextAnalyzer.get_similarity(stack_utils.get_text(comment), stack_utils.get_text(answer))
                 # compute the weight of the edge
                 weight = get_edge_weight(similarity, comment_sentiment, answer_sentiment)
+
+                if tweet.user in Graph.node:
+                    Graph.node[tweet.user]['text'].add(tweet.text)
+                else:
+                    Graph.add_node(tweet.user, text={tweet.text})
                 Graph.add_edge(comment_user_id, answer_user_id, weight=weight)
     return Graph
 

diff --git a/src/argonaut/argumentation/mine/from_twitter.py b/src/argonaut/argumentation/mine/from_twitter.py
@@ -1,5 +1,6 @@
 import tweepy
 import networkx as nx
+import argonaut.utils.io as io
 from functools import lru_cache
 from argonaut.utils.twitter_utils import *
 import argonaut.utils.common_utils as utils
@@ -37,7 +38,7 @@ def get_debate_graph(query='trump', language='en', mode='comments', save=True, p
         Graph = merge_multiedges(Graph)
     if save:
         suffix = f'twitter_{mode}'
-        save_graph(Graph, suffix, path=path, framework=framework, n_decimal=n_decimal, verbose=verbose)
+        io.save_graph(Graph, suffix, path=path, mode=mode, framework=framework, n_decimal=n_decimal, verbose=verbose)
     if verbose:
         print(f'NUMBER OF NODES IN THE GRAPH:      {count_nodes(Graph)}')
         print(f'NUMBER OF EDGES IN THE GRAPH:      {count_edges(Graph)}')
@@ -69,11 +70,15 @@ def __build_graph_from_comments(conversations):
         for i, tweet in enumerate(conv):
             if tweet.parent is not None:
                 answered_tweet = conv[i+1]
+                # COMPUTE THE WEIGHT
                 similarity = TextAnalyzer.get_similarity(tweet.text, answered_tweet.text)
                 tweet_sentiment = TextAnalyzer.get_sentiment(tweet.text)
                 answered_tweet_sentiment = TextAnalyzer.get_sentiment(answered_tweet.text)
-                # or if the edge already exist change the weight
                 weight = get_edge_weight(similarity, tweet_sentiment, answered_tweet_sentiment)
+                # ADD NODES ATTRIBUTES
+                Graph.add_node(tweet.id, text=tweet.text, user=tweet.user)
+                Graph.add_node(answered_tweet.id, text=answered_tweet.text, user=answered_tweet.user)
+                # ADD EDGE
                 Graph.add_edge(tweet.id, answered_tweet.id, weight=weight)
             else:
                 pass
@@ -86,12 +91,21 @@ def __build_graph_from_users(conversations):
         for i, tweet in enumerate(conv):
             if tweet.parent is not None:
                 answered_tweet = conv[i+1]
+                # COMPUTE THE WEIGHT
                 similarity = TextAnalyzer.get_similarity(tweet.text, answered_tweet.text)
                 tweet_sentiment = TextAnalyzer.get_sentiment(tweet.text)
                 answered_tweet_sentiment = TextAnalyzer.get_sentiment(answered_tweet.text)
-                # TODO: or if the edge already exist change the weight
                 weight = get_edge_weight(similarity, tweet_sentiment, answered_tweet_sentiment)
-                # TODO: when multiple edges find a way to merge them
+                # ADD NODES ATTRIBUTES
+                if tweet.user in Graph.node:
+                    Graph.node[tweet.user]['text'].add(tweet.text)
+                else:
+                    Graph.add_node(tweet.user, text={tweet.text})
+                if answered_tweet.user in Graph.node:
+                    Graph.node[answered_tweet.user]['text'].add(answered_tweet.text)
+                else:
+                    Graph.add_node(answered_tweet.user, text={answered_tweet.text})
+                # ADD EDGE
                 Graph.add_edge(tweet.user, answered_tweet.user, weight=weight)
             else:
                 pass

diff --git a/src/argonaut/utils/common_utils.py b/src/argonaut/utils/common_utils.py
@@ -11,6 +11,7 @@
 PRETRAINED_MODELS_PATH = Path(MODELS_PATH, 'pretrained')
 INTERIM_DATA_PATH = Path(DATA_PATH, 'interim')
 PROLOG_DATA_PATH  = Path(DATA_PATH, 'prolog')
+OUTPUT_DATA_PATH  = Path(DATA_PATH, 'output')
 
 CREDENTIALS_PATH = Path(RES_PATH, 'credentials.yml')
 W2V_GOOGLENEWS_MODEL_PATH = Path(PRETRAINED_MODELS_PATH, 'GoogleNews-vectors-negative300.bin.gz')
@@ -48,35 +49,37 @@ def __reporthook(blocknum, blocksize, totalsize):
     else: # total size is unknown
         sys.stderr.write("read %d\n" % (readsofar,))
 
-def pickle_graph(Graph, path):
-    path = Path(path)
-    path.parent.mkdir(parents=True, exist_ok=True)
-    with open(str(path), 'wb') as file:
-        pickle.dump(Graph, file)
-    print(f'Graph pickled successfully at: {path}.', '\n')
 
-def load_pickled_graph(path):
-    with open(str(path), 'rb') as file:
-        Graph = pickle.load(file)
-    return Graph
-
-def get_graph_name(suffix=''):
-    return f'{__get_time()}_{suffix}_graph.pickle'
-
-def __get_time(format='%y%m%d-%H%M%S'):
-    return datetime.datetime.now().strftime(format)
-
-def save_facts(facts, path):
-    path = Path(path)
-    path.parent.mkdir(parents=True, exist_ok=True)
-    with open(str(path), 'w') as file:
-        for fact in sorted(facts):
-            file.write(str(fact) + '\n')
-    print(f'Prolog facts saved successfully at: {path}.')
-
-def get_facts_name(suffix='', graph_name=None, framework=''):
-    facts_name = f'{__get_time()}_{suffix}_{framework}_facts.pl'
-    if graph_name is not None:
-        graph_path = Path(graph_name)
-        facts_name = graph_path.name.replace('graph.pickle', f'{framework}_facts.pl')
-    return facts_name
+# IO
+# def pickle_graph(Graph, path):
+#     path = Path(path)
+#     path.parent.mkdir(parents=True, exist_ok=True)
+#     with open(str(path), 'wb') as file:
+#         pickle.dump(Graph, file)
+#     print(f'Graph pickled successfully at: {path}.', '\n')
+# 
+# def load_pickled_graph(path):
+#     with open(str(path), 'rb') as file:
+#         Graph = pickle.load(file)
+#     return Graph
+# 
+# def get_graph_name(suffix=''):
+#     return f'{__get_time()}_{suffix}_graph.pickle'
+#
+# def __get_time(format='%y%m%d-%H%M%S'):
+#     return datetime.datetime.now().strftime(format)
+#
+# def save_facts(facts, path):
+#     path = Path(path)
+#     path.parent.mkdir(parents=True, exist_ok=True)
+#     with open(str(path), 'w') as file:
+#         for fact in sorted(facts):
+#             file.write(str(fact) + '\n')
+#     print(f'Prolog facts saved successfully at: {path}.')
+#
+# def get_facts_name(suffix='', graph_name=None, framework=''):
+#     facts_name = f'{__get_time()}_{suffix}_{framework}_facts.pl'
+#     if graph_name is not None:
+#         graph_path = Path(graph_name)
+#         facts_name = graph_path.name.replace('graph.pickle', f'{framework}_facts.pl')
+#     return facts_name