Skip to content

Commit

Permalink
working on #19
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisPiemonte committed Feb 21, 2019
1 parent d68f156 commit 3edf4d7
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 51 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ __pycache__/
*$py.class

.ipynb_checkpoints
.idea/
*.pyc
*_.ipynb
.DS_Store
Expand Down
40 changes: 40 additions & 0 deletions src/argonaut/argumentation/convert/to_lines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import argonaut.utils.io as io
import argonaut.utils.common_utils as utils


def user_nodes_to_lines(Graph, sep=','):
lines = []
for node_id in Graph.nodes:
text = list_to_string(Graph.node[node_id].get('text', ''))
lines += user_node_to_text(node_id, text, sep=sep)
return lines

def comment_nodes_to_lines(Graph, sep=','):
lines = []
for node_id in Graph.nodes:
text = Graph.node[node_id].get('text', '')
user = Graph.node[node_id].get('user', '')
lines += comment_node_to_text(node_id, text, user, sep=sep)
return lines

def edges_to_lines(Graph, sep=','):
lines = []
for source, dest, data in Graph.edges(data=True):
weight = str(data['weight']) if 'weight' in data else ''
lines += edge_to_text(source, dest, weight, sep=sep)
return lines



def user_node_to_text(node_id, text, sep=','):
return f'{node_id}{sep}{text}'

def comment_node_to_text(node_id, text, user, sep=','):
return f'{node_id}{sep}{text}{sep}{user}'

def edge_to_text(source, dest, weight, sep=','):
return f'{source}{sep}{dest}{sep}{weight}'


def list_to_string(l, sep=' --- '):
return sep.join(l)
1 change: 1 addition & 0 deletions src/argonaut/argumentation/convert/to_prolog.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import argonaut.utils.io as io
import argonaut.utils.common_utils as utils
from argonaut.argumentation.convert import common
from argonaut.argumentation.convert.frameworks import bwaf, waf, baf, af
Expand Down
12 changes: 1 addition & 11 deletions src/argonaut/argumentation/mine/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
import matplotlib.pyplot as plt
import argonaut.text.TextAnalyzer
import argonaut.utils.io as io
from argonaut.utils.twitter_utils import *
import argonaut.utils.common_utils as utils
from argonaut.argumentation.convert import common
Expand Down Expand Up @@ -50,17 +51,6 @@ def merge_multiedges(MultiDiGraph):
Graph[u][v]['weight'] /= Graph[u][v]['num']
return Graph

def save_graph(Graph, suffix, path=None, framework=common.BWAF, n_decimal=2, verbose=False):
graph_name = utils.get_graph_name(suffix=suffix)
graph_output_path = Path(utils.INTERIM_DATA_PATH, graph_name) if path is None else path + '_graph.pickle'
utils.pickle_graph(Graph, graph_output_path)
# SAVE PROLOG FACTS
facts = to_prolog.to_facts(Graph, framework=framework, n_decimal=n_decimal, verbose=verbose)
facts_name = utils.get_facts_name(graph_name=graph_name, framework=framework)
facts_output_path = Path(utils.PROLOG_DATA_PATH, facts_name) if path is None else path + '_facts.pl'
utils.save_facts(facts, facts_output_path)
print('Everything saved successfully.', '\n')

def count_nodes(Graph):
return len(set(Graph.nodes()))

Expand Down
18 changes: 16 additions & 2 deletions src/argonaut/argumentation/mine/from_reddit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys, os, praw
import networkx as nx
from bs4 import BeautifulSoup
import argonaut.utils.io as io
import argonaut.utils.common_utils as utils
from argonaut.argumentation.mine.common import *
import argonaut.text.TextAnalyzer as TextAnalyzer
Expand Down Expand Up @@ -30,7 +31,7 @@ def get_debate_graph(submissionId=None, mode='comments', save=True, path=None,
Graph = merge_multiedges(Graph)
if save:
suffix = f'reddit_{mode}'
save_graph(Graph, suffix, path=path, framework=framework, n_decimal=n_decimal, verbose=verbose)
io.save_graph(Graph, suffix, path=path, mode=mode, framework=framework, n_decimal=n_decimal, verbose=verbose)
if verbose:
print(f'NUMBER OF NODES IN THE GRAPH: {count_nodes(Graph)}')
print(f'NUMBER OF EDGES IN THE GRAPH: {count_edges(Graph)}')
Expand All @@ -55,6 +56,10 @@ def __build_graph_from_comments(comments):
parent_sentiment = TextAnalyzer.get_sentiment(comment.parent_text)
similarity = TextAnalyzer.get_similarity(comment.text, comment.parent_text)
weight = get_edge_weight(similarity, comment_sentiment, parent_sentiment)
# ADD NODES ATTRIBUTES
Graph.add_node(comment.id, text=comment.text, user=comment.user)
Graph.add_node(comment.parent, text=comment.parent_text, user=comment.parent_user)
# ADD EDGE
Graph.add_edge(comment.id, comment.parent, weight=weight)
else:
pass
Expand All @@ -68,7 +73,16 @@ def __build_graph_from_users(comments):
parent_sentiment = TextAnalyzer.get_sentiment(comment.parent_text)
similarity = TextAnalyzer.get_similarity(comment.text, comment.parent_text)
weight = get_edge_weight(similarity, comment_sentiment, parent_sentiment)
# TODO merge edges
# ADD NODES ATTRIBUTES
if comment.user in Graph.node:
Graph.node[comment.user]['text'].add(comment.text)
else:
Graph.add_node(comment.user, text={comment.text})
if comment.parent_user in Graph.node:
Graph.node[comment.parent_user]['text'].add(comment.parent_text)
else:
Graph.add_node(comment.parent_user, text={comment.parent_text})
# ADD EDGE
Graph.add_edge(comment.user, comment.parent_user, weight=weight)
else:
pass
Expand Down
26 changes: 23 additions & 3 deletions src/argonaut/argumentation/mine/from_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import networkx as nx
from random import randint
from stackapi import StackAPI
import argonaut.utils.io as io
import argonaut.utils.common_utils as utils
from argonaut.argumentation.mine.common import *
import argonaut.utils.stack_utils as stack_utils
Expand All @@ -27,7 +28,7 @@ def get_debate_graph(question=None, mode='comments', save=True, path=None,
Graph = merge_multiedges(Graph)
if save:
suffix = f'stack_{mode}'
save_graph(Graph, suffix, path=path, framework=framework, n_decimal=n_decimal, verbose=verbose)
io.save_graph(Graph, suffix, path=path, mode=mode, framework=framework, n_decimal=n_decimal, verbose=verbose)
if verbose:
print(f'NUMBER OF NODES IN THE GRAPH: {count_nodes(Graph)}')
print(f'NUMBER OF EDGES IN THE GRAPH: {count_edges(Graph)}')
Expand All @@ -44,7 +45,7 @@ def __build_graph_from_comments(questions):
for question in questions:
question_id = stack_utils.get_question_id(question)
question_sentiment = TextAnalyzer.get_sentiment(stack_utils.get_text(question))
Graph.add_node(question_id)
Graph.add_node(question_id, text=stack_utils.get_text(question), user=stack_utils.get_user_id(question))
answers = get_answers(question_id, site=site)

for answer in answers['items']:
Expand All @@ -53,6 +54,8 @@ def __build_graph_from_comments(questions):
similarity = TextAnalyzer.get_similarity(stack_utils.get_text(question), stack_utils.get_text(answer))
# compute the weight of the edge
weight = get_edge_weight(similarity, answer_sentiment, question_sentiment)

Graph.add_node(answer_id, text=stack_utils.get_text(answer), user=stack_utils.get_user_id(answer))
Graph.add_edge(answer_id, question_id, weight=weight)
comments = get_comments(answer_id, site=site)

Expand All @@ -62,6 +65,8 @@ def __build_graph_from_comments(questions):
similarity = TextAnalyzer.get_similarity(stack_utils.get_text(answer), stack_utils.get_text(comment))
# compute the weight of the edge
weight = get_edge_weight(similarity, comment_sentiment, answer_sentiment)

Graph.add_node(comment_id, text=stack_utils.get_text(comment), user=stack_utils.get_user_id(comment))
Graph.add_edge(comment_id, answer_id, weight=weight)
return Graph

Expand All @@ -76,7 +81,12 @@ def __build_graph_from_users(questions):
question_id = stack_utils.get_question_id(question)
question_user_id = stack_utils.get_user_id(question)
question_sentiment = TextAnalyzer.get_sentiment(stack_utils.get_text(question))
Graph.add_node(question_user_id)

if question_user_id in Graph.node:
Graph.node[question_user_id]['text'].add(stack_utils.get_text(question))
else:
Graph.add_node(question_user_id, text={stack_utils.get_text(question)})
# Graph.add_node(question_user_id)
answers = get_answers(question_id, site=site)

for answer in answers['items']:
Expand All @@ -86,6 +96,11 @@ def __build_graph_from_users(questions):
similarity = TextAnalyzer.get_similarity(stack_utils.get_text(answer), stack_utils.get_text(question))
# compute the weight of the edge
weight = get_edge_weight(similarity, answer_sentiment, question_sentiment)

if answer_user_id in Graph.node:
Graph.node[answer_user_id]['text'].add(stack_utils.get_text(answer))
else:
Graph.add_node(answer_user_id, text={stack_utils.get_text(answer)})
Graph.add_edge(answer_user_id, question_user_id, weight=weight)
comments = get_comments(answer_id, site=site)

Expand All @@ -96,6 +111,11 @@ def __build_graph_from_users(questions):
similarity = TextAnalyzer.get_similarity(stack_utils.get_text(comment), stack_utils.get_text(answer))
# compute the weight of the edge
weight = get_edge_weight(similarity, comment_sentiment, answer_sentiment)

if tweet.user in Graph.node:
Graph.node[tweet.user]['text'].add(tweet.text)
else:
Graph.add_node(tweet.user, text={tweet.text})
Graph.add_edge(comment_user_id, answer_user_id, weight=weight)
return Graph

Expand Down
22 changes: 18 additions & 4 deletions src/argonaut/argumentation/mine/from_twitter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import tweepy
import networkx as nx
import argonaut.utils.io as io
from functools import lru_cache
from argonaut.utils.twitter_utils import *
import argonaut.utils.common_utils as utils
Expand Down Expand Up @@ -37,7 +38,7 @@ def get_debate_graph(query='trump', language='en', mode='comments', save=True, p
Graph = merge_multiedges(Graph)
if save:
suffix = f'twitter_{mode}'
save_graph(Graph, suffix, path=path, framework=framework, n_decimal=n_decimal, verbose=verbose)
io.save_graph(Graph, suffix, path=path, mode=mode, framework=framework, n_decimal=n_decimal, verbose=verbose)
if verbose:
print(f'NUMBER OF NODES IN THE GRAPH: {count_nodes(Graph)}')
print(f'NUMBER OF EDGES IN THE GRAPH: {count_edges(Graph)}')
Expand Down Expand Up @@ -69,11 +70,15 @@ def __build_graph_from_comments(conversations):
for i, tweet in enumerate(conv):
if tweet.parent is not None:
answered_tweet = conv[i+1]
# COMPUTE THE WEIGHT
similarity = TextAnalyzer.get_similarity(tweet.text, answered_tweet.text)
tweet_sentiment = TextAnalyzer.get_sentiment(tweet.text)
answered_tweet_sentiment = TextAnalyzer.get_sentiment(answered_tweet.text)
# or if the edge already exist change the weight
weight = get_edge_weight(similarity, tweet_sentiment, answered_tweet_sentiment)
# ADD NODES ATTRIBUTES
Graph.add_node(tweet.id, text=tweet.text, user=tweet.user)
Graph.add_node(answered_tweet.id, text=answered_tweet.text, user=answered_tweet.user)
# ADD EDGE
Graph.add_edge(tweet.id, answered_tweet.id, weight=weight)
else:
pass
Expand All @@ -86,12 +91,21 @@ def __build_graph_from_users(conversations):
for i, tweet in enumerate(conv):
if tweet.parent is not None:
answered_tweet = conv[i+1]
# COMPUTE THE WEIGHT
similarity = TextAnalyzer.get_similarity(tweet.text, answered_tweet.text)
tweet_sentiment = TextAnalyzer.get_sentiment(tweet.text)
answered_tweet_sentiment = TextAnalyzer.get_sentiment(answered_tweet.text)
# TODO: or if the edge already exist change the weight
weight = get_edge_weight(similarity, tweet_sentiment, answered_tweet_sentiment)
# TODO: when multiple edges find a way to merge them
# ADD NODES ATTRIBUTES
if tweet.user in Graph.node:
Graph.node[tweet.user]['text'].add(tweet.text)
else:
Graph.add_node(tweet.user, text={tweet.text})
if answered_tweet.user in Graph.node:
Graph.node[answered_tweet.user]['text'].add(answered_tweet.text)
else:
Graph.add_node(answered_tweet.user, text={answered_tweet.text})
# ADD EDGE
Graph.add_edge(tweet.user, answered_tweet.user, weight=weight)
else:
pass
Expand Down
65 changes: 34 additions & 31 deletions src/argonaut/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
PRETRAINED_MODELS_PATH = Path(MODELS_PATH, 'pretrained')
INTERIM_DATA_PATH = Path(DATA_PATH, 'interim')
PROLOG_DATA_PATH = Path(DATA_PATH, 'prolog')
OUTPUT_DATA_PATH = Path(DATA_PATH, 'output')

CREDENTIALS_PATH = Path(RES_PATH, 'credentials.yml')
W2V_GOOGLENEWS_MODEL_PATH = Path(PRETRAINED_MODELS_PATH, 'GoogleNews-vectors-negative300.bin.gz')
Expand Down Expand Up @@ -48,35 +49,37 @@ def __reporthook(blocknum, blocksize, totalsize):
else: # total size is unknown
sys.stderr.write("read %d\n" % (readsofar,))

def pickle_graph(Graph, path):
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
with open(str(path), 'wb') as file:
pickle.dump(Graph, file)
print(f'Graph pickled successfully at: {path}.', '\n')

def load_pickled_graph(path):
with open(str(path), 'rb') as file:
Graph = pickle.load(file)
return Graph

def get_graph_name(suffix=''):
return f'{__get_time()}_{suffix}_graph.pickle'

def __get_time(format='%y%m%d-%H%M%S'):
return datetime.datetime.now().strftime(format)

def save_facts(facts, path):
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
with open(str(path), 'w') as file:
for fact in sorted(facts):
file.write(str(fact) + '\n')
print(f'Prolog facts saved successfully at: {path}.')

def get_facts_name(suffix='', graph_name=None, framework=''):
facts_name = f'{__get_time()}_{suffix}_{framework}_facts.pl'
if graph_name is not None:
graph_path = Path(graph_name)
facts_name = graph_path.name.replace('graph.pickle', f'{framework}_facts.pl')
return facts_name
# IO
# def pickle_graph(Graph, path):
#  path = Path(path)
#  path.parent.mkdir(parents=True, exist_ok=True)
#  with open(str(path), 'wb') as file:
#  pickle.dump(Graph, file)
#  print(f'Graph pickled successfully at: {path}.', '\n')
# def load_pickled_graph(path):
#  with open(str(path), 'rb') as file:
#  Graph = pickle.load(file)
#  return Graph
# def get_graph_name(suffix=''):
# return f'{__get_time()}_{suffix}_graph.pickle'
#
# def __get_time(format='%y%m%d-%H%M%S'):
# return datetime.datetime.now().strftime(format)
#
# def save_facts(facts, path):
# path = Path(path)
# path.parent.mkdir(parents=True, exist_ok=True)
# with open(str(path), 'w') as file:
# for fact in sorted(facts):
# file.write(str(fact) + '\n')
# print(f'Prolog facts saved successfully at: {path}.')
#
# def get_facts_name(suffix='', graph_name=None, framework=''):
# facts_name = f'{__get_time()}_{suffix}_{framework}_facts.pl'
# if graph_name is not None:
# graph_path = Path(graph_name)
# facts_name = graph_path.name.replace('graph.pickle', f'{framework}_facts.pl')
# return facts_name
Loading

0 comments on commit 3edf4d7

Please sign in to comment.