Skip to content

Commit 3d1ebde

Browse files
authored
[GH-2509] Refactor the example projects to include better examples (#2510)
1 parent 8fd55cc commit 3d1ebde

File tree

16 files changed

+838
-439
lines changed

16 files changed

+838
-439
lines changed

.github/workflows/example.yml

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ concurrency:
3939

4040
jobs:
4141
build:
42+
name: 'Spark ${{ matrix.spark }}, Hadoop ${{ matrix.hadoop }}, Sedona ${{ matrix.sedona }}'
4243
runs-on: ubuntu-22.04
4344
strategy:
4445
fail-fast: false
@@ -56,23 +57,6 @@ jobs:
5657
spark-compat: '3.4'
5758
sedona: 1.8.0
5859
hadoop: 3.3.4
59-
env:
60-
JAVA_TOOL_OPTIONS: >-
61-
-XX:+IgnoreUnrecognizedVMOptions
62-
--add-opens=java.base/java.lang=ALL-UNNAMED
63-
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED
64-
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED
65-
--add-opens=java.base/java.io=ALL-UNNAMED
66-
--add-opens=java.base/java.net=ALL-UNNAMED
67-
--add-opens=java.base/java.nio=ALL-UNNAMED
68-
--add-opens=java.base/java.util=ALL-UNNAMED
69-
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED
70-
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED
71-
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
72-
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED
73-
--add-opens=java.base/sun.security.action=ALL-UNNAMED
74-
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED
75-
-Djdk.reflect.useDirectMethodHandle=false
7660
steps:
7761
- uses: actions/checkout@v5
7862
- uses: actions/setup-java@v5
@@ -100,7 +84,8 @@ jobs:
10084
path: ~/.m2
10185
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
10286
restore-keys: ${{ runner.os }}-m2
103-
- env:
87+
- name: Test Scala Spark SQL Example
88+
env:
10489
SPARK_VERSION: ${{ matrix.spark }}
10590
SPARK_LOCAL_IP: 127.0.0.1
10691
SPARK_COMPAT_VERSION: ${{ matrix.spark-compat }}
@@ -109,16 +94,28 @@ jobs:
10994
run: |
11095
cd examples/spark-sql
11196
mvn versions:set -DnewVersion=${SEDONA_VERSION} -DgenerateBackupPoms=false
112-
mvn clean install \
97+
mvn clean test \
11398
-Dspark.version=${SPARK_VERSION} \
11499
-Dspark.compat.version=${SPARK_COMPAT_VERSION} \
115100
-Dsedona.version=${SEDONA_VERSION} \
116101
-Dhadoop.version=${HADOOP_VERSION}
117-
java -jar target/sedona-spark-example-${SEDONA_VERSION}.jar
118-
- env:
102+
- name: Test Java Spark SQL Example
103+
env:
104+
SPARK_VERSION: ${{ matrix.spark }}
105+
SPARK_LOCAL_IP: 127.0.0.1
106+
SPARK_COMPAT_VERSION: ${{ matrix.spark-compat }}
107+
SEDONA_VERSION: ${{ matrix.sedona }}
108+
HADOOP_VERSION: ${{ matrix.hadoop }}
109+
run: |
110+
cd examples/java-spark-sql
111+
mvn versions:set -DnewVersion=${SEDONA_VERSION} -DgenerateBackupPoms=false
112+
mvn clean test \
113+
-Dspark.version=${SPARK_VERSION} \
114+
-Dspark.compat.version=${SPARK_COMPAT_VERSION}
115+
- name: Test Flink SQL Example
116+
env:
119117
SEDONA_VERSION: ${{ matrix.sedona }}
120118
run: |
121119
cd examples/flink-sql
122120
mvn versions:set -DnewVersion=${SEDONA_VERSION} -DgenerateBackupPoms=false
123-
mvn clean install
124-
java -jar target/sedona-flink-example-${SEDONA_VERSION}.jar
121+
mvn clean test

examples/flink-sql/pom.xml

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
3232
<geotools.scope>compile</geotools.scope>
3333
<flink.version>1.19.0</flink.version>
34-
<flink.scope>compile</flink.scope>
34+
<flink.scope>provided</flink.scope>
3535
<scala.compat.version>2.12</scala.compat.version>
3636
<geotools.version>33.1</geotools.version>
3737
<log4j.version>2.17.2</log4j.version>
@@ -247,6 +247,20 @@
247247
</execution>
248248
</executions>
249249
</plugin>
250+
<plugin>
251+
<groupId>org.apache.maven.plugins</groupId>
252+
<artifactId>maven-surefire-plugin</artifactId>
253+
<version>2.22.2</version>
254+
<configuration>
255+
<argLine>
256+
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
257+
--add-opens=java.base/java.nio=ALL-UNNAMED
258+
--add-opens=java.base/java.lang=ALL-UNNAMED
259+
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED
260+
--add-opens=java.base/java.util=ALL-UNNAMED
261+
</argLine>
262+
</configuration>
263+
</plugin>
250264
<plugin>
251265
<groupId>org.jacoco</groupId>
252266
<artifactId>jacoco-maven-plugin</artifactId>
@@ -266,6 +280,29 @@
266280
</execution>
267281
</executions>
268282
</plugin>
283+
<plugin>
284+
<groupId>com.diffplug.spotless</groupId>
285+
<artifactId>spotless-maven-plugin</artifactId>
286+
<version>2.35.0</version>
287+
<configuration>
288+
<java>
289+
<googleJavaFormat>
290+
<version>1.15.0</version>
291+
</googleJavaFormat>
292+
<licenseHeader>
293+
<file>../../tools/maven/license-header.txt</file>
294+
</licenseHeader>
295+
</java>
296+
</configuration>
297+
<executions>
298+
<execution>
299+
<goals>
300+
<goal>check</goal>
301+
</goals>
302+
<phase>compile</phase>
303+
</execution>
304+
</executions>
305+
</plugin>
269306
</plugins>
270307
<resources>
271308
<resource>

examples/flink-sql/src/main/java/FlinkExample.java

Lines changed: 63 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -16,70 +16,84 @@
1616
* specific language governing permissions and limitations
1717
* under the License.
1818
*/
19+
import static org.apache.flink.table.api.Expressions.$;
20+
import static org.apache.flink.table.api.Expressions.call;
1921

2022
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
2123
import org.apache.flink.table.api.EnvironmentSettings;
2224
import org.apache.flink.table.api.Table;
2325
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
24-
2526
import org.apache.sedona.flink.SedonaFlinkRegistrator;
2627
import org.apache.sedona.flink.expressions.Constructors;
2728

28-
import static org.apache.flink.table.api.Expressions.$;
29-
import static org.apache.flink.table.api.Expressions.call;
30-
31-
public class FlinkExample
32-
{
33-
static String[] pointColNames = {"geom_point", "name_point", "event_time", "proc_time"};
34-
35-
static String[] polygonColNames = {"geom_polygon", "name_polygon", "event_time", "proc_time"};
29+
public class FlinkExample {
30+
static String[] pointColNames = {"geom_point", "name_point", "event_time", "proc_time"};
3631

37-
public static void main(String[] args) {
38-
int testDataSize = 10;
39-
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
40-
EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
41-
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings);
42-
SedonaFlinkRegistrator.registerType(env);
43-
SedonaFlinkRegistrator.registerFunc(tableEnv);
32+
static String[] polygonColNames = {"geom_polygon", "name_polygon", "event_time", "proc_time"};
4433

45-
// Create a fake WKT string table source
46-
Table pointWktTable = Utils.createTextTable(env, tableEnv, Utils.createPointWKT(testDataSize), pointColNames);
34+
public static void main(String[] args) {
35+
testS2SpatialJoin(10);
36+
}
4737

48-
// Create a geometry column
49-
Table pointTable = pointWktTable.select(
50-
call("ST_GeomFromWKT", $(pointColNames[0])).as(pointColNames[0]),
51-
$(pointColNames[1]));
38+
public static void testS2SpatialJoin(int testDataSize) {
39+
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
40+
EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
41+
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings);
42+
SedonaFlinkRegistrator.registerType(env);
43+
SedonaFlinkRegistrator.registerFunc(tableEnv);
5244

53-
// Create S2CellID
54-
pointTable = pointTable.select($(pointColNames[0]), $(pointColNames[1]),
55-
call("ST_S2CellIDs", $(pointColNames[0]), 6).as("s2id_array"));
56-
// Explode s2id array
57-
tableEnv.createTemporaryView("pointTable", pointTable);
58-
pointTable = tableEnv.sqlQuery("SELECT geom_point, name_point, s2id_point FROM pointTable CROSS JOIN UNNEST(pointTable.s2id_array) AS tmpTbl1(s2id_point)");
45+
// Create a fake WKT string table source
46+
Table pointWktTable =
47+
Utils.createTextTable(env, tableEnv, Utils.createPointWKT(testDataSize), pointColNames);
5948

49+
// Create a geometry column
50+
Table pointTable =
51+
pointWktTable.select(
52+
call("ST_GeomFromWKT", $(pointColNames[0])).as(pointColNames[0]), $(pointColNames[1]));
6053

61-
// Create a fake WKT string table source
62-
Table polygonWktTable = Utils.createTextTable(env, tableEnv, Utils.createPolygonWKT(testDataSize), polygonColNames);
63-
// Create a geometry column
64-
Table polygonTable = polygonWktTable.select(call(Constructors.ST_GeomFromWKT.class.getSimpleName(),
65-
$(polygonColNames[0])).as(polygonColNames[0]),
66-
$(polygonColNames[1]));
67-
// Create S2CellID
68-
polygonTable = polygonTable.select($(polygonColNames[0]), $(polygonColNames[1]),
69-
call("ST_S2CellIDs", $(polygonColNames[0]), 6).as("s2id_array"));
70-
// Explode s2id array
71-
tableEnv.createTemporaryView("polygonTable", polygonTable);
72-
polygonTable = tableEnv.sqlQuery("SELECT geom_polygon, name_polygon, s2id_polygon FROM polygonTable CROSS JOIN UNNEST(polygonTable.s2id_array) AS tmpTbl2(s2id_polygon)");
54+
// Create S2CellID
55+
pointTable =
56+
pointTable.select(
57+
$(pointColNames[0]),
58+
$(pointColNames[1]),
59+
call("ST_S2CellIDs", $(pointColNames[0]), 6).as("s2id_array"));
60+
// Explode s2id array
61+
tableEnv.createTemporaryView("pointTable", pointTable);
62+
pointTable =
63+
tableEnv.sqlQuery(
64+
"SELECT geom_point, name_point, s2id_point FROM pointTable CROSS JOIN UNNEST(pointTable.s2id_array) AS tmpTbl1(s2id_point)");
7365

74-
// TODO: TableImpl.print() occurs EOF Exception due to https://issues.apache.org/jira/browse/FLINK-35406
75-
// Use polygonTable.execute().print() when FLINK-35406 is fixed.
76-
polygonTable.execute().collect().forEachRemaining(row -> System.out.println(row));
66+
// Create a fake WKT string table source
67+
Table polygonWktTable =
68+
Utils.createTextTable(env, tableEnv, Utils.createPolygonWKT(testDataSize), polygonColNames);
69+
// Create a geometry column
70+
Table polygonTable =
71+
polygonWktTable.select(
72+
call(Constructors.ST_GeomFromWKT.class.getSimpleName(), $(polygonColNames[0]))
73+
.as(polygonColNames[0]),
74+
$(polygonColNames[1]));
75+
// Create S2CellID
76+
polygonTable =
77+
polygonTable.select(
78+
$(polygonColNames[0]),
79+
$(polygonColNames[1]),
80+
call("ST_S2CellIDs", $(polygonColNames[0]), 6).as("s2id_array"));
81+
// Explode s2id array
82+
tableEnv.createTemporaryView("polygonTable", polygonTable);
83+
polygonTable =
84+
tableEnv.sqlQuery(
85+
"SELECT geom_polygon, name_polygon, s2id_polygon FROM polygonTable CROSS JOIN UNNEST(polygonTable.s2id_array) AS tmpTbl2(s2id_polygon)");
7786

78-
// Join two tables by their S2 ids
79-
Table joinResult = pointTable.join(polygonTable).where($("s2id_point").isEqual($("s2id_polygon")));
80-
// Optional: remove false positives
81-
joinResult = joinResult.where(call("ST_Contains", $("geom_polygon"), $("geom_point")));
82-
joinResult.execute().collect().forEachRemaining(row -> System.out.println(row));
83-
}
87+
// TODO: TableImpl.print() occurs EOF Exception due to
88+
// https://issues.apache.org/jira/browse/FLINK-35406
89+
// Use polygonTable.execute().print() when FLINK-35406 is fixed.
90+
polygonTable.execute().collect().forEachRemaining(row -> System.out.println(row));
8491

92+
// Join two tables by their S2 ids
93+
Table joinResult =
94+
pointTable.join(polygonTable).where($("s2id_point").isEqual($("s2id_polygon")));
95+
// Optional: remove false positives
96+
joinResult = joinResult.where(call("ST_Contains", $("geom_polygon"), $("geom_point")));
97+
joinResult.execute().collect().forEachRemaining(row -> System.out.println(row));
98+
}
8599
}

0 commit comments

Comments
 (0)