[WIP] train with other accounts and account to be tested

soldag · Dec 13, 2016 · 3907980 · 3907980
1 parent 51eec9e
commit 3907980
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 12 deletions.
diff --git a/cli.py b/cli.py
@@ -1,7 +1,6 @@
 import argparse
 
-from core import prepare_data
-from core import run_pipeline
+from core import prepare_data, run_pipeline
 from core.evaluation import writeToXlsx
 
 from crawler import crawl_status_updates
@@ -24,7 +23,7 @@ def analyze(data_source_type, classifier_type, experiments_count,
     print("Run experiments...")
     evaluation_data = []
     for i in range(0, experiments_count):
-        tp, tn, fp, fn = run_pipeline(status_updates, classifier_type)
+        tp, tn, fp, fn = run_pipeline(data_source_type, status_updates, classifier_type, dataset_path = 'C:/Users/sebas/Downloads/twitter_popular_users_10.csv')
         evaluation_data.append([i, tp, tn, fp, fn, (tp + tn) / (tp + tn + fp + fn), tp / (tp + fp), tp / (tp + fn)])
 
         print("Evaluation results for experiment %i/%i" % (i + 1, experiments_count))

diff --git a/core/__init__.py b/core/__init__.py
@@ -20,8 +20,20 @@ def prepare_data(data_provider_type, **kwargs):
 
     return status_updates
 
+def run_pipeline(data_provider_type, status_updates, classifier_type, **kwargs):
+    base_size = 200
+    base_status_updates = get_status_updates(data_provider_type, **kwargs) + status_updates[:base_size]
+    base_features = [extract_features(tweet) for tweet in base_status_updates]
+    model = train_classifier(base_features, [False] * (len(base_status_updates) - base_size) + [True] * base_size, classifier_type)
+    predictions = []
+    for i in range(len(status_updates[20:])):
+        prediction = model.predict(extract_features(status_updates[i]))
+        predictions.append(prediction)
+    print(predictions)
+    return 0, 0, 0, 0
 
-def run_pipeline(status_updates, classifier_type):
+
+def run_pipeline_old(status_updates, classifier_type):
     # Split dataset into testing and training set (per author)
     train_status_updates = []
     test_status_updates = []

diff --git a/core/training/__init__.py b/core/training/__init__.py
@@ -1,16 +1,16 @@
 from .decision_tree import train_classifier as train_decision_tree
-
+from .perceptron import train_classifier as train_perceptron
 
 type_classifier_mapping = {
-    'decision_tree': train_decision_tree
+    'decision_tree': train_decision_tree,
+    'perceptron': train_perceptron
 }
 
-
-def train_classifier(samples, labels, classifier_type):
+def train_classifier(samples, labels, classifier_type, **kwargs):
     if len(samples) != len(labels):
         raise ValueError('Number of samples has to equal number of labels!')
     if classifier_type not in type_classifier_mapping:
         raise ValueError('Invalid classifier_type!')
 
     training_callable = type_classifier_mapping[classifier_type]
-    return training_callable(samples, labels)
+    return training_callable(samples, labels, **kwargs)
diff --git a/core/training/decision_tree.py b/core/training/decision_tree.py
@@ -1,8 +1,8 @@
-from sklearn import linear_model
+from sklearn import tree
 
 
 def train_classifier(samples, labels):
-    classifier = linear_model.Perceptron()
-    classifier = classifier.partial_fit(samples, labels, classes=labels)
+    classifier = tree.DecisionTreeClassifier()
+    classifier = classifier.fit(samples, labels)
 
     return classifier
diff --git a/core/training/perceptron.py b/core/training/perceptron.py
@@ -0,0 +1,7 @@
+from sklearn import linear_model
+
+def train_classifier(samples, labels, classifier = None):
+    classifier = classifier or linear_model.Perceptron()
+    classifier = classifier.partial_fit(samples, labels, classes=labels)
+
+    return classifier