Skip to content

Commit

Permalink
Remove Tweet utils. (#323)
Browse files Browse the repository at this point in the history
- Resolves #322
- Resolves #206
- Resolves #194
  • Loading branch information
ruebot authored and ianmilligan1 committed Jul 15, 2019
1 parent 4ce59c8 commit 20ffeeb
Show file tree
Hide file tree
Showing 10 changed files with 0 additions and 393 deletions.
21 changes: 0 additions & 21 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -475,16 +475,6 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>chill_2.11</artifactId>
<version>0.8.4</version>
</dependency>
<dependency>
<groupId>org.json4s</groupId>
<artifactId>json4s-jackson_2.11</artifactId>
<version>3.2.11</version>
</dependency>
<dependency>
<groupId>org.scala-lang.modules</groupId>
<artifactId>scala-parser-combinators_2.11</artifactId>
Expand All @@ -505,17 +495,6 @@
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.scalanlp</groupId>
<artifactId>breeze_2.11</artifactId>
<version>0.13.1</version>
<exclusions>
<exclusion>
<groupId>net.sourceforge.f2j</groupId>
<artifactId>arpack_combined_all</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
Expand Down
27 changes: 0 additions & 27 deletions src/main/scala/io/archivesunleashed/matchbox/ExtractHashtags.scala

This file was deleted.

14 changes: 0 additions & 14 deletions src/main/scala/io/archivesunleashed/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ import org.apache.hadoop.fs.{FileSystem, Path}
import io.archivesunleashed.matchbox.ExtractDate.DateComponent._
import org.apache.spark.sql._
import org.apache.spark.sql.types._
import org.json4s._
import org.json4s.jackson.JsonMethods._
// scalastyle:on: underscore.import
import org.apache.hadoop.io.LongWritable
import org.apache.spark.{SerializableWritable, SparkContext}
Expand Down Expand Up @@ -68,18 +66,6 @@ package object archivesunleashed {
((r._2.getFormat == ArchiveFormat.WARC) && r._2.getRecord.getHeader.getHeaderValue("WARC-Type").equals("response")))
.map(r => new ArchiveRecordImpl(new SerializableWritable(r._2)))
}

/** Creates an Archive Record RDD from tweets.
*
* @param path the path to the Tweets file
* @param sc the apache spark context
* @return an RDD of JValue (json objects) for mapping.
*/
def loadTweets(path: String, sc: SparkContext): RDD[JValue] =
// scalastyle:off null
sc.textFile(path).filter(line => !line.startsWith("{\"delete\":"))
.map(line => try { parse(line) } catch { case e: Exception => null }).filter(x => x != null)
// scalastyle:on null
}

/** A Wrapper class around RDD to simplify counting. */
Expand Down
78 changes: 0 additions & 78 deletions src/main/scala/io/archivesunleashed/util/TweetUtils.scala

This file was deleted.

1 change: 0 additions & 1 deletion src/test/resources/arc/delTweetsTest.json

This file was deleted.

18 changes: 0 additions & 18 deletions src/test/scala/io/archivesunleashed/RecordLoaderTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@
package io.archivesunleashed

import com.google.common.io.Resources
// scalastyle:off underscore.import
import io.archivesunleashed.util.TweetUtils._
// scalastyle:on underscore.import
import org.apache.spark.{SparkConf, SparkContext}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
Expand All @@ -29,8 +26,6 @@ import org.scalatest.{BeforeAndAfter, FunSuite}
@RunWith(classOf[JUnitRunner])
class RecordLoaderTest extends FunSuite with BeforeAndAfter {
private val warcPath = Resources.getResource("warc/example.warc.gz").getPath
private val tweetPath = Resources.getResource("arc/tweetsTest.json").getPath
private val delTweetPath = Resources.getResource("arc/delTweetsTest.json").getPath
private val master = "local[4]"
private val appName = "example-spark"
private var sc: SparkContext = _
Expand All @@ -51,19 +46,6 @@ class RecordLoaderTest extends FunSuite with BeforeAndAfter {
assert(base(0) == "http://www.archive.org/")
}

test("loads Tweets") {
val base = RecordLoader.loadTweets(tweetPath, sc)
.map(x => x.text())
.collect()
assert(base(0) == "some text")
assert(base(1) == "some more text")
}

test("does not load deleted") {
val base = RecordLoader.loadTweets(delTweetPath, sc).collect()
assert(base.deep == Array().deep)
}

after {
if (sc != null) {
sc.stop()
Expand Down

This file was deleted.

This file was deleted.

35 changes: 0 additions & 35 deletions src/test/scala/io/archivesunleashed/matchbox/ExtractUrlsTest.scala

This file was deleted.

Loading

0 comments on commit 20ffeeb

Please sign in to comment.