Skip to content

Commit

Permalink
[WIP] train with other accounts and account to be tested
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastiankliem committed Dec 13, 2016
1 parent 51eec9e commit 3907980
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 12 deletions.
5 changes: 2 additions & 3 deletions cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import argparse

from core import prepare_data
from core import run_pipeline
from core import prepare_data, run_pipeline
from core.evaluation import writeToXlsx

from crawler import crawl_status_updates
Expand All @@ -24,7 +23,7 @@ def analyze(data_source_type, classifier_type, experiments_count,
print("Run experiments...")
evaluation_data = []
for i in range(0, experiments_count):
tp, tn, fp, fn = run_pipeline(status_updates, classifier_type)
tp, tn, fp, fn = run_pipeline(data_source_type, status_updates, classifier_type, dataset_path = 'C:/Users/sebas/Downloads/twitter_popular_users_10.csv')
evaluation_data.append([i, tp, tn, fp, fn, (tp + tn) / (tp + tn + fp + fn), tp / (tp + fp), tp / (tp + fn)])

print("Evaluation results for experiment %i/%i" % (i + 1, experiments_count))
Expand Down
14 changes: 13 additions & 1 deletion core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,20 @@ def prepare_data(data_provider_type, **kwargs):

return status_updates

def run_pipeline(data_provider_type, status_updates, classifier_type, **kwargs):
base_size = 200
base_status_updates = get_status_updates(data_provider_type, **kwargs) + status_updates[:base_size]
base_features = [extract_features(tweet) for tweet in base_status_updates]
model = train_classifier(base_features, [False] * (len(base_status_updates) - base_size) + [True] * base_size, classifier_type)
predictions = []
for i in range(len(status_updates[20:])):
prediction = model.predict(extract_features(status_updates[i]))
predictions.append(prediction)
print(predictions)
return 0, 0, 0, 0

def run_pipeline(status_updates, classifier_type):

def run_pipeline_old(status_updates, classifier_type):
# Split dataset into testing and training set (per author)
train_status_updates = []
test_status_updates = []
Expand Down
10 changes: 5 additions & 5 deletions core/training/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from .decision_tree import train_classifier as train_decision_tree

from .perceptron import train_classifier as train_perceptron

type_classifier_mapping = {
'decision_tree': train_decision_tree
'decision_tree': train_decision_tree,
'perceptron': train_perceptron
}


def train_classifier(samples, labels, classifier_type):
def train_classifier(samples, labels, classifier_type, **kwargs):
if len(samples) != len(labels):
raise ValueError('Number of samples has to equal number of labels!')
if classifier_type not in type_classifier_mapping:
raise ValueError('Invalid classifier_type!')

training_callable = type_classifier_mapping[classifier_type]
return training_callable(samples, labels)
return training_callable(samples, labels, **kwargs)
6 changes: 3 additions & 3 deletions core/training/decision_tree.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from sklearn import linear_model
from sklearn import tree


def train_classifier(samples, labels):
classifier = linear_model.Perceptron()
classifier = classifier.partial_fit(samples, labels, classes=labels)
classifier = tree.DecisionTreeClassifier()
classifier = classifier.fit(samples, labels)

return classifier
7 changes: 7 additions & 0 deletions core/training/perceptron.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from sklearn import linear_model

def train_classifier(samples, labels, classifier = None):
classifier = classifier or linear_model.Perceptron()
classifier = classifier.partial_fit(samples, labels, classes=labels)

return classifier

0 comments on commit 3907980

Please sign in to comment.