We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 01d4bf6 commit cc04c49Copy full SHA for cc04c49
SF_2011/Benetech/R/common_summaries.R
@@ -0,0 +1,7 @@
1
+with_summaries <- subset(benetech, nchar(summary) > 0)
2
+with_english_summaries <- subset(with_summaries, language == "en")
3
+write.csv(with_english_summaries$summary, "with-summaries.csv", col.names = FALSE, row.names = FALSE, quote=FALSE)
4
+
5
+# then use
6
+# cat with-summaries.csv | tr " " "\n" | sort | uniq
7
+# then invoke common_words.py
SF_2011/Benetech/python/common_words.py
@@ -0,0 +1,23 @@
+import numpy
+word_to_count = {}
+for line in body:
+ count_and_word = line.lstrip().rstrip().split(" ")
+ count = count_and_word[0]
+ if len(count_and_word) == 2:
8
+ word = count_and_word[1]
9
+ else:
10
+ word = ""
11
+ word_to_count[word] = count
12
13
+word_to_count.values
14
+numpy.array(word_to_count.keys())[numpy.argsort(word_to_count.values())][-50:]
15
+# array(['formed', 'Thanpyuzayart', 'ya', 'fruit', '20', 'following',
16
+# 'Division', 'Time)', 'what', 'Operation', '2007', 'endure', 'been',
17
+# 'leader', 'most', 'demanded', 'participation', 'physical', 'arrest',
18
+# 'should', 'tried', 'Kaw', 'when', 'as', 'Nam', '2011', 'taking',
19
+# 'place', 'sent', 'Zaw', 'Tun', 'over', 'Namkham', 'Tin', 'health',
20
+# 'outside', 'I', 'While', 'Moe', 'Lay', 'prisons', 'Three',
21
+# 'further', 'reported', '(First', 'according', "don't", 'If', 'rice',
22
+# 'and'],
23
+# dtype='|S78')
0 commit comments