Skip to content

Commit 6010740

Browse files
author
Thomas Risberg
committed
Adding plain Hadoop map-reduce example
1 parent 36fc818 commit 6010740

File tree

11 files changed

+245
-0
lines changed

11 files changed

+245
-0
lines changed

tweet-counts-hadoop/.classpath

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<classpath>
3+
<classpathentry kind="src" output="target/classes" path="src/main/java">
4+
<attributes>
5+
<attribute name="optional" value="true"/>
6+
<attribute name="maven.pomderived" value="true"/>
7+
</attributes>
8+
</classpathentry>
9+
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
10+
<attributes>
11+
<attribute name="maven.pomderived" value="true"/>
12+
</attributes>
13+
</classpathentry>
14+
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
15+
<attributes>
16+
<attribute name="optional" value="true"/>
17+
<attribute name="maven.pomderived" value="true"/>
18+
</attributes>
19+
</classpathentry>
20+
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
21+
<attributes>
22+
<attribute name="maven.pomderived" value="true"/>
23+
</attributes>
24+
</classpathentry>
25+
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
26+
<attributes>
27+
<attribute name="maven.pomderived" value="true"/>
28+
</attributes>
29+
</classpathentry>
30+
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
31+
<attributes>
32+
<attribute name="maven.pomderived" value="true"/>
33+
</attributes>
34+
</classpathentry>
35+
<classpathentry kind="output" path="target/classes"/>
36+
</classpath>

tweet-counts-hadoop/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/target

tweet-counts-hadoop/.project

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<projectDescription>
3+
<name>tweet-counts-hadoop</name>
4+
<comment></comment>
5+
<projects>
6+
</projects>
7+
<buildSpec>
8+
<buildCommand>
9+
<name>org.eclipse.jdt.core.javabuilder</name>
10+
<arguments>
11+
</arguments>
12+
</buildCommand>
13+
<buildCommand>
14+
<name>org.eclipse.m2e.core.maven2Builder</name>
15+
<arguments>
16+
</arguments>
17+
</buildCommand>
18+
</buildSpec>
19+
<natures>
20+
<nature>org.eclipse.jdt.core.javanature</nature>
21+
<nature>org.eclipse.m2e.core.maven2Nature</nature>
22+
</natures>
23+
</projectDescription>
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
eclipse.preferences.version=1
2+
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3+
org.eclipse.jdt.core.compiler.compliance=1.5
4+
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5+
org.eclipse.jdt.core.compiler.source=1.5
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
activeProfiles=
2+
eclipse.preferences.version=1
3+
resolveWorkspaceProjects=true
4+
version=1

tweet-counts-hadoop/pom.xml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
5+
<modelVersion>4.0.0</modelVersion>
6+
<groupId>com.springdeveloper.hadoop</groupId>
7+
<artifactId>tweet-counts-hadoop</artifactId>
8+
<version>0.0.1.BUILD-SNAPSHOT</version>
9+
<packaging>jar</packaging>
10+
<name>Tweet Counts</name>
11+
<url>http://www.springdeveloper.com</url>
12+
<description>Simple map-reduce program</description>
13+
<properties>
14+
<maven.test.failure.ignore>true</maven.test.failure.ignore>
15+
<hadoop.version>2.0.5-alpha</hadoop.version>
16+
</properties>
17+
<dependencies>
18+
<dependency>
19+
<groupId>org.apache.hadoop</groupId>
20+
<artifactId>hadoop-common</artifactId>
21+
<version>${hadoop.version}</version>
22+
</dependency>
23+
<dependency>
24+
<groupId>org.apache.hadoop</groupId>
25+
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
26+
<version>${hadoop.version}</version>
27+
</dependency>
28+
<dependency>
29+
<groupId>log4j</groupId>
30+
<artifactId>log4j</artifactId>
31+
<version>1.2.14</version>
32+
</dependency>
33+
<dependency>
34+
<groupId>junit</groupId>
35+
<artifactId>junit</artifactId>
36+
<version>4.7</version>
37+
<scope>test</scope>
38+
</dependency>
39+
</dependencies>
40+
41+
<build>
42+
<plugins>
43+
<plugin>
44+
<groupId>org.apache.maven.plugins</groupId>
45+
<artifactId>maven-compiler-plugin</artifactId>
46+
<configuration>
47+
<source>1.6</source>
48+
<target>1.6</target>
49+
</configuration>
50+
</plugin>
51+
</plugins>
52+
</build>
53+
</project>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package com.springdeveloper.hadoop;
2+
3+
import java.io.IOException;
4+
5+
import org.apache.hadoop.io.IntWritable;
6+
import org.apache.hadoop.io.Text;
7+
import org.apache.hadoop.mapreduce.Reducer;
8+
9+
public class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
10+
11+
@Override
12+
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
13+
throws IOException, InterruptedException {
14+
int sum = 0;
15+
for (IntWritable value : values) {
16+
sum += value.get();
17+
}
18+
context.write(key, new IntWritable(sum));
19+
}
20+
21+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package com.springdeveloper.hadoop;
2+
3+
import java.io.IOException;
4+
import java.util.HashMap;
5+
import java.util.List;
6+
import java.util.Map;
7+
8+
import org.apache.hadoop.io.IntWritable;
9+
import org.apache.hadoop.io.LongWritable;
10+
import org.apache.hadoop.io.Text;
11+
import org.apache.hadoop.mapreduce.Mapper;
12+
import org.codehaus.jackson.JsonFactory;
13+
import org.codehaus.jackson.map.ObjectMapper;
14+
import org.codehaus.jackson.type.TypeReference;
15+
16+
17+
public class TweetCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
18+
19+
private final static IntWritable ONE = new IntWritable(1);
20+
private final ObjectMapper mapper = new ObjectMapper(new JsonFactory());
21+
22+
@SuppressWarnings("unchecked")
23+
@Override
24+
protected void map(LongWritable key, Text value, Context context)
25+
throws IOException, InterruptedException {
26+
27+
Map<String, Object> tweet = mapper.readValue(value.toString(),
28+
new TypeReference<HashMap<String, Object>>(){});
29+
Map<String, Object> entities = (Map<String, Object>) tweet.get("entities");
30+
List<Map<String, Object>> hashTagEntries = null;
31+
32+
if (entities != null) {
33+
hashTagEntries = (List<Map<String, Object>>) entities.get("hashTags");
34+
}
35+
36+
if (hashTagEntries != null && hashTagEntries.size() > 0) {
37+
for (Map<String, Object> hashTagEntry : hashTagEntries) {
38+
String hashTag = hashTagEntry.get("text").toString();
39+
context.write(new Text(hashTag), ONE);
40+
}
41+
}
42+
}
43+
44+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package com.springdeveloper.hadoop;
2+
3+
import java.io.IOException;
4+
5+
import org.apache.hadoop.conf.Configuration;
6+
import org.apache.hadoop.fs.Path;
7+
import org.apache.hadoop.io.IntWritable;
8+
import org.apache.hadoop.io.Text;
9+
import org.apache.hadoop.mapreduce.Job;
10+
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11+
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12+
import org.apache.hadoop.util.GenericOptionsParser;
13+
14+
public class TweetHashTagCounter {
15+
16+
/**
17+
* @param args
18+
* @throws IOException
19+
* @throws ClassNotFoundException
20+
* @throws InterruptedException
21+
*/
22+
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
23+
Configuration conf = new Configuration();
24+
String[] myArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
25+
if (myArgs.length != 2) {
26+
System.err.println("Usage: TweetHashTagCounter <input path> <output path>");
27+
System.exit(-1);
28+
}
29+
Job job = Job.getInstance(conf, "Tweet Hash Tag Counter");
30+
job.setJarByClass(TweetHashTagCounter.class);
31+
job.setMapperClass(TweetCountMapper.class);
32+
job.setCombinerClass(IntSumReducer.class);
33+
job.setReducerClass(IntSumReducer.class);
34+
job.setOutputKeyClass(Text.class);
35+
job.setOutputValueClass(IntWritable.class);
36+
FileInputFormat.addInputPath(job, new Path(myArgs[0]));
37+
FileOutputFormat.setOutputPath(job, new Path(myArgs[1]));
38+
System.exit(job.waitForCompletion(true) ? 0 : 1);
39+
}
40+
41+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package com.springdeveloper.hadoop;
2+
3+
import junit.framework.TestCase;
4+
5+
public class ExampleTest extends TestCase {
6+
7+
public void testReadOnce() throws Exception {
8+
assertTrue("", 12 == "Hello world!".length());
9+
}
10+
11+
}

0 commit comments

Comments
 (0)