-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcli.py
71 lines (57 loc) · 3.1 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import argparse
from core import prepare_data
from core import run_pipeline
from core.evaluation import writeToXlsx
from crawler import crawl_status_updates
def analyze(data_source_type, classifier_type, experiments_count,
dataset_path=None, twitter_user=None):
# Get status updates and prepare data
print("Retrieve and prepare data...")
provider_parameter = {}
if dataset_path is not None:
provider_parameter["dataset_path"] = dataset_path
elif data_source_type == "twitter" and twitter_user is not None:
provider_parameter["user_id"] = twitter_user
else:
raise ValueError("Either dataset_path or twitter_user has to be provided.")
status_updates = prepare_data(data_source_type, **provider_parameter)
# Run specified number of experiments
print("Run experiments...")
evaluation_data = []
for i in range(0, experiments_count):
tp, tn, fp, fn = run_pipeline(status_updates, classifier_type)
evaluation_data.append([i, tp, tn, fp, fn, (tp + tn) / (tp + tn + fp + fn), tp / (tp + fp), tp / (tp + fn)])
print("Evaluation results for experiment %i/%i" % (i + 1, experiments_count))
print("True positives: " + str(tp))
print("True negatives: " + str(tn))
print("False positives: " + str(fp))
print("False negatives: " + str(fn))
writeToXlsx(evaluation_data, experiments_count)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="This application analyzes social media status updates in order to determine, whether an account was compromised or not.")
parser.add_argument("--action", "-a",
help="The action that should be performed. Possible values are 'crawl' and 'analyze'.")
parser.add_argument("--data-source-type", "-t", default=None,
help="The type of the specified data source. Possible values are 'fth', 'mp' and 'twitter'.")
# Data source arguments
parser.add_argument("--dataset-path", "-p", default=None,
help="The path of the dataset, which contains the status_updates. ")
parser.add_argument("--twitter-user", "-u", default=None,
help="The id of the twitter user, whose status updates should be analyzed.")
# Crawl arguments
parser.add_argument("--user-limit", type=int, default=100,
help="The maximum number of accounts to crawl.")
# Train arguments
parser.add_argument("--classifier-type", "-c", default=None,
help="The type of the classifier to be trained. ")
parser.add_argument("--experiments-count", "-n", type=int, default=10,
help="The number of experiments to run.")
args = parser.parse_args()
if args.action == 'crawl':
crawl_status_updates(args.data_source_type, args.dataset_path,
user_limit=args.user_limit)
elif args.action == 'analyze':
analyze(args.data_source_type, args.classifier_type,
args.experiments_count, args.dataset_path, args.twitter_user)
else:
print("Invalid mode!")