Skip to content

Commit eb3af1b

Browse files
author
Thomas Risberg
committed
Adding Spring Batch based workflow example
1 parent ed0ff1a commit eb3af1b

24 files changed

+649
-0
lines changed

tweet-batch/.classpath

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<classpath>
3+
<classpathentry kind="src" output="target/classes" path="src/main/java">
4+
<attributes>
5+
<attribute name="optional" value="true"/>
6+
<attribute name="maven.pomderived" value="true"/>
7+
</attributes>
8+
</classpathentry>
9+
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
10+
<attributes>
11+
<attribute name="maven.pomderived" value="true"/>
12+
</attributes>
13+
</classpathentry>
14+
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
15+
<attributes>
16+
<attribute name="optional" value="true"/>
17+
<attribute name="maven.pomderived" value="true"/>
18+
</attributes>
19+
</classpathentry>
20+
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
21+
<attributes>
22+
<attribute name="maven.pomderived" value="true"/>
23+
</attributes>
24+
</classpathentry>
25+
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
26+
<attributes>
27+
<attribute name="maven.pomderived" value="true"/>
28+
</attributes>
29+
</classpathentry>
30+
<classpathentry kind="output" path="target/classes"/>
31+
</classpath>

tweet-batch/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/target

tweet-batch/.project

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<projectDescription>
3+
<name>tweet-batch</name>
4+
<comment></comment>
5+
<projects>
6+
</projects>
7+
<buildSpec>
8+
<buildCommand>
9+
<name>org.eclipse.jdt.core.javabuilder</name>
10+
<arguments>
11+
</arguments>
12+
</buildCommand>
13+
<buildCommand>
14+
<name>org.eclipse.m2e.core.maven2Builder</name>
15+
<arguments>
16+
</arguments>
17+
</buildCommand>
18+
<buildCommand>
19+
<name>org.springframework.ide.eclipse.core.springbuilder</name>
20+
<arguments>
21+
</arguments>
22+
</buildCommand>
23+
</buildSpec>
24+
<natures>
25+
<nature>org.springframework.ide.eclipse.core.springnature</nature>
26+
<nature>org.eclipse.jdt.core.javanature</nature>
27+
<nature>org.eclipse.m2e.core.maven2Nature</nature>
28+
</natures>
29+
</projectDescription>
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
//com.springsource.sts.config.flow.coordinates\:http\://www.springframework.org/schema/batch\:/tweet-batch/src/main/resources/META-INF/spring/app-context.xml=<?xml version\="1.0" encoding\="UTF-8"?>\n<graph>\n<element clazz\="JobModelElement" type\="job">\n<structure end\="2519" endstart\="2507" start\="1444" startend\="1477"/>\n<bounds height\="118" width\="181" x\="15" y\="17"/>\n</element>\n</graph>
2+
//com.springsource.sts.config.flow.coordinates\:http\://www.springframework.org/schema/batch\:/tweet-batch/src/main/resources/META-INF/spring/application-context.xml=<?xml version\="1.0" encoding\="UTF-8"?>\n<graph>\n<element clazz\="JobModelElement" type\="job">\n<structure end\="2603" endstart\="2591" start\="1534" startend\="1567"/>\n<bounds height\="456" width\="181" x\="294" y\="17"/>\n</element>\n<element clazz\="SplitContainerElement" type\="split">\n<structure end\="2125" endstart\="2111" start\="1675" startend\="1745"/>\n<bounds height\="222" width\="267" x\="15" y\="159"/>\n</element>\n</graph>
3+
//com.springsource.sts.config.flow.coordinates\:http\://www.springframework.org/schema/batch\:/tweet-batch/src/main/resources/META-INF/spring/export-context.xml=<?xml version\="1.0" encoding\="UTF-8"?>\n<graph/>
4+
//com.springsource.sts.config.flow.coordinates\:http\://www.springframework.org/schema/batch\:/tweet-batch/src/main/resources/META-INF/spring/pig-context.xml=<?xml version\="1.0" encoding\="UTF-8"?>\n<graph/>
5+
eclipse.preferences.version=1
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
eclipse.preferences.version=1
2+
encoding//src/main/java=UTF-8
3+
encoding//src/main/resources=UTF-8
4+
encoding/<project>=UTF-8
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
eclipse.preferences.version=1
2+
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
3+
org.eclipse.jdt.core.compiler.compliance=1.5
4+
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
5+
org.eclipse.jdt.core.compiler.source=1.5
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
activeProfiles=
2+
eclipse.preferences.version=1
3+
resolveWorkspaceProjects=true
4+
version=1

tweet-batch/.springBeans

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<beansProjectDescription>
3+
<version>1</version>
4+
<pluginVersion><![CDATA[3.3.0.201307091516-RELEASE]]></pluginVersion>
5+
<configSuffixes>
6+
<configSuffix><![CDATA[xml]]></configSuffix>
7+
</configSuffixes>
8+
<enableImports><![CDATA[false]]></enableImports>
9+
<configs>
10+
<config>src/main/resources/META-INF/spring/application-context.xml</config>
11+
</configs>
12+
<configSets>
13+
</configSets>
14+
</beansProjectDescription>

tweet-batch/pom.xml

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
5+
<modelVersion>4.0.0</modelVersion>
6+
<groupId>com.springdeveloper.hadoop</groupId>
7+
<artifactId>tweet-batch</artifactId>
8+
<version>0.1.0</version>
9+
<packaging>jar</packaging>
10+
<name>Tweet Batch</name>
11+
<description>Spring driven Hadoop Batch workflow</description>
12+
13+
<properties>
14+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
15+
<spring.framework.version>3.2.4.RELEASE</spring.framework.version>
16+
<spring.hadoop.version>1.0.1.RELEASE</spring.hadoop.version>
17+
<spring.batch.version>2.2.1.RELEASE</spring.batch.version>
18+
<pig.version>0.11.1</pig.version>
19+
<hive.version>0.11.0</hive.version>
20+
</properties>
21+
22+
<dependencies>
23+
<dependency>
24+
<groupId>com.springdeveloper.hadoop</groupId>
25+
<artifactId>tweet-counts-hadoop</artifactId>
26+
<version>0.1.0</version>
27+
<scope>runtime</scope>
28+
</dependency>
29+
30+
<dependency>
31+
<groupId>org.springframework.data</groupId>
32+
<artifactId>spring-data-hadoop</artifactId>
33+
<version>${spring.hadoop.version}</version>
34+
</dependency>
35+
36+
<dependency>
37+
<groupId>org.springframework</groupId>
38+
<artifactId>spring-aop</artifactId>
39+
<version>${spring.framework.version}</version>
40+
</dependency>
41+
42+
<dependency>
43+
<groupId>org.springframework</groupId>
44+
<artifactId>spring-context-support</artifactId>
45+
<version>${spring.framework.version}</version>
46+
</dependency>
47+
48+
<dependency>
49+
<groupId>org.springframework</groupId>
50+
<artifactId>spring-jdbc</artifactId>
51+
<version>${spring.framework.version}</version>
52+
</dependency>
53+
54+
<dependency>
55+
<groupId>org.springframework</groupId>
56+
<artifactId>spring-tx</artifactId>
57+
<version>${spring.framework.version}</version>
58+
</dependency>
59+
60+
<dependency>
61+
<groupId>org.springframework.batch</groupId>
62+
<artifactId>spring-batch-core</artifactId>
63+
<version>${spring.batch.version}</version>
64+
</dependency>
65+
66+
<dependency>
67+
<groupId>org.apache.pig</groupId>
68+
<artifactId>pig</artifactId>
69+
<version>${pig.version}</version>
70+
<scope>compile</scope>
71+
</dependency>
72+
73+
<!-- Pig runtime dependencies start -->
74+
<dependency>
75+
<groupId>jline</groupId>
76+
<artifactId>jline</artifactId>
77+
<version>0.9.94</version>
78+
<scope>runtime</scope>
79+
</dependency>
80+
81+
<dependency>
82+
<groupId>com.google.guava</groupId>
83+
<artifactId>guava</artifactId>
84+
<version>r06</version>
85+
<scope>runtime</scope>
86+
</dependency>
87+
88+
<dependency>
89+
<groupId>org.antlr</groupId>
90+
<artifactId>antlr-runtime</artifactId>
91+
<version>3.1</version>
92+
<scope>runtime</scope>
93+
</dependency>
94+
95+
<dependency>
96+
<groupId>joda-time</groupId>
97+
<artifactId>joda-time</artifactId>
98+
<version>2.3</version>
99+
<scope>runtime</scope>
100+
</dependency>
101+
102+
<dependency>
103+
<groupId>dk.brics.automaton</groupId>
104+
<artifactId>automaton</artifactId>
105+
<version>1.11-8</version>
106+
</dependency>
107+
<!-- Pig runtime dependencies end -->
108+
109+
<dependency>
110+
<groupId>org.apache.hive</groupId>
111+
<artifactId>hive-jdbc</artifactId>
112+
<version>${hive.version}</version>
113+
<scope>runtime</scope>
114+
</dependency>
115+
116+
<dependency>
117+
<groupId>org.codehaus.groovy</groupId>
118+
<artifactId>groovy</artifactId>
119+
<version>1.8.5</version>
120+
<scope>runtime</scope>
121+
</dependency>
122+
123+
<dependency>
124+
<groupId>log4j</groupId>
125+
<artifactId>log4j</artifactId>
126+
<version>1.2.14</version>
127+
</dependency>
128+
</dependencies>
129+
130+
<repositories>
131+
<repository>
132+
<id>spring-milestone</id>
133+
<url>http://repo.springsource.org/libs-milestone</url>
134+
</repository>
135+
</repositories>
136+
137+
<build>
138+
<plugins>
139+
<plugin>
140+
<groupId>org.codehaus.mojo</groupId>
141+
<artifactId>appassembler-maven-plugin</artifactId>
142+
<version>1.2.2</version>
143+
<configuration>
144+
<repositoryLayout>flat</repositoryLayout>
145+
<configurationSourceDirectory>src/main/config</configurationSourceDirectory>
146+
<copyConfigurationDirectory>true</copyConfigurationDirectory>
147+
<!-- Extra JVM arguments that will be included in the bin scripts -->
148+
<extraJvmArguments>-Xms512m -Xmx1024m</extraJvmArguments>
149+
<programs>
150+
<program>
151+
<mainClass>com.springdeveloper.hadoop.batch.BatchApp</mainClass>
152+
<name>batchApp</name>
153+
</program>
154+
</programs>
155+
</configuration>
156+
<executions>
157+
<execution>
158+
<id>package</id>
159+
<goals>
160+
<goal>assemble</goal>
161+
</goals>
162+
</execution>
163+
</executions>
164+
</plugin>
165+
<plugin>
166+
<groupId>org.apache.maven.plugins</groupId>
167+
<artifactId>maven-antrun-plugin</artifactId>
168+
<executions>
169+
<execution>
170+
<id>config</id>
171+
<phase>package</phase>
172+
<configuration>
173+
<tasks>
174+
<copy todir="target/appassembler/data">
175+
<fileset dir="../data"/>
176+
</copy>
177+
</tasks>
178+
</configuration>
179+
<goals>
180+
<goal>run</goal>
181+
</goals>
182+
</execution>
183+
</executions>
184+
</plugin>
185+
</plugins>
186+
</build>
187+
188+
</project>
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Direct log messages to stdout
2+
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
3+
log4j.appender.stdout.Target=System.out
4+
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
5+
log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %40.40c:%4L - %m%n
6+
7+
# Root logger option
8+
log4j.rootLogger=INFO, stdout
9+
10+
log4j.logger.DataNucleus=OFF
11+
#log4j.logger.org.springframework=DEBUG
12+
#log4j.logger.org.apache.hadoop=DEBUG
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package com.springdeveloper.batch.item;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
6+
import org.springframework.batch.item.file.LineMapper;
7+
import org.springframework.dao.DataIntegrityViolationException;
8+
import org.springframework.util.Assert;
9+
10+
public class HashTagMapper implements LineMapper<Map<String, Object>> {
11+
12+
public Map<String, Object> mapLine(String line, int lineNum) throws Exception {
13+
Assert.notNull(line, "Expecting line not to be null");
14+
String[] tokens = line.split("\t");
15+
if (tokens.length != 2) {
16+
throw new DataIntegrityViolationException("Expecting 2 tokens in input line: " + line);
17+
}
18+
Map<String, Object> data = new HashMap<String, Object>();
19+
data.put("hashtag", tokens[0]);
20+
data.put("count", tokens[1]);
21+
return data;
22+
}
23+
24+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package com.springdeveloper.batch.item;
2+
3+
import java.util.List;
4+
import java.util.Map;
5+
6+
import org.apache.commons.logging.Log;
7+
import org.apache.commons.logging.LogFactory;
8+
import org.springframework.batch.core.ItemWriteListener;
9+
10+
public class MyJdbcListener implements ItemWriteListener<Map<String, Object>> {
11+
12+
private static final Log log = LogFactory.getLog(MyJdbcListener.class);
13+
14+
public void afterWrite(List<? extends Map<String, Object>> m) {
15+
}
16+
17+
public void beforeWrite(List<? extends Map<String, Object>> m) {
18+
log.info("Writing to JDBC :: " + m);
19+
}
20+
21+
public void onWriteError(Exception e,
22+
List<? extends Map<String, Object>> l) {
23+
}
24+
25+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package com.springdeveloper.hadoop.batch;
2+
3+
import java.util.Date;
4+
import java.util.LinkedHashMap;
5+
import java.util.Map;
6+
import java.util.Scanner;
7+
8+
import org.apache.commons.logging.Log;
9+
import org.apache.commons.logging.LogFactory;
10+
import org.springframework.batch.core.BatchStatus;
11+
import org.springframework.batch.core.Job;
12+
import org.springframework.batch.core.JobParameter;
13+
import org.springframework.batch.core.JobParameters;
14+
import org.springframework.batch.core.JobParametersBuilder;
15+
import org.springframework.batch.core.launch.JobLauncher;
16+
import org.springframework.beans.factory.BeanInitializationException;
17+
import org.springframework.context.ApplicationContext;
18+
import org.springframework.context.support.AbstractApplicationContext;
19+
import org.springframework.context.support.ClassPathXmlApplicationContext;
20+
import org.springframework.util.Assert;
21+
22+
public class BatchApp {
23+
24+
private static final Log log = LogFactory.getLog(BatchApp.class);
25+
26+
public static void main(String[] args) throws Exception {
27+
AbstractApplicationContext context =
28+
new ClassPathXmlApplicationContext("classpath:/META-INF/spring/application-context.xml");
29+
log.info("Batch TweetCount Application Running");
30+
context.registerShutdownHook();
31+
32+
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
33+
Job job = context.getBean(Job.class);
34+
35+
jobLauncher.run(
36+
job,
37+
new JobParametersBuilder()
38+
.addString("mr.input", "/tweets/input")
39+
.addString("mr.output", "/tweets/output")
40+
.addString("localData", "data/nbatweets-small.txt")
41+
.addDate("date", new Date()).toJobParameters());
42+
context.close();
43+
}
44+
}

0 commit comments

Comments
 (0)