From 1ef63ada25192901321643cd6b52c600dcab4e3f Mon Sep 17 00:00:00 2001 From: Heemin Kim Date: Fri, 7 Jun 2024 19:14:05 -0700 Subject: [PATCH] Switch to iterative version of WKT format parser Signed-off-by: Heemin Kim --- CHANGELOG.md | 1 + .../geometry/utils/WellKnownText.java | 90 +++++++++++++++++-- 2 files changed, 86 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 539f5a6628dac..dba00aca0df77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,5 +65,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix double invocation of postCollection when MultiBucketCollector is present ([#14015](https://github.com/opensearch-project/OpenSearch/pull/14015)) ### Security +- Switch to iterative version of WKT format parser ([#14086](https://github.com/opensearch-project/OpenSearch/pull/14086)) [Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.13...2.x diff --git a/libs/geo/src/main/java/org/opensearch/geometry/utils/WellKnownText.java b/libs/geo/src/main/java/org/opensearch/geometry/utils/WellKnownText.java index ed1d63e6d4fef..c8f6681214ce2 100644 --- a/libs/geo/src/main/java/org/opensearch/geometry/utils/WellKnownText.java +++ b/libs/geo/src/main/java/org/opensearch/geometry/utils/WellKnownText.java @@ -53,6 +53,7 @@ import java.util.Collections; import java.util.List; import java.util.Locale; +import java.util.Stack; /** * Utility class for converting to and from WKT @@ -301,16 +302,95 @@ private Geometry parseGeometry(StreamTokenizer stream) throws IOException, Parse throw new IllegalArgumentException("Unknown geometry type: " + type); } + /** + * Iterative version of + * + * to avoid StackOverflowError when there is a deeply nested structure of GeometryCollection. + */ private GeometryCollection parseGeometryCollection(StreamTokenizer stream) throws IOException, ParseException { if (nextEmptyOrOpen(stream).equals(EMPTY)) { return GeometryCollection.EMPTY; } - List shapes = new ArrayList<>(); - shapes.add(parseGeometry(stream)); - while (nextCloserOrComma(stream).equals(COMMA)) { - shapes.add(parseGeometry(stream)); + + List topLevelShapes = new ArrayList<>(); + Stack> stack = new Stack<>(); + stack.push(topLevelShapes); + boolean isFirstIteration = true; + List currentLevelShapes = null; + while (!stack.isEmpty()) { + List previousShapes = stack.pop(); + if (currentLevelShapes != null) { + previousShapes.add(new GeometryCollection<>(currentLevelShapes)); + } + currentLevelShapes = previousShapes; + + if (isFirstIteration == true) { + isFirstIteration = false; + } else { + if (!nextCloserOrComma(stream).equals(COMMA)) { + continue; + } + } + while (true) { + final String type = nextWord(stream).toLowerCase(Locale.ROOT); + switch (type) { + case "point": + currentLevelShapes.add(parsePoint(stream)); + break; + case "multipoint": + currentLevelShapes.add(parseMultiPoint(stream)); + break; + case "linestring": + currentLevelShapes.add(parseLine(stream)); + break; + case "multilinestring": + currentLevelShapes.add(parseMultiLine(stream)); + break; + case "polygon": + currentLevelShapes.add(parsePolygon(stream)); + break; + case "multipolygon": + currentLevelShapes.add(parseMultiPolygon(stream)); + break; + case "bbox": + currentLevelShapes.add(parseBBox(stream)); + break; + case "geometrycollection": + if (nextEmptyOrOpen(stream).equals(EMPTY)) { + currentLevelShapes.add(GeometryCollection.EMPTY); + break; + } else { + stack.push(currentLevelShapes); + currentLevelShapes = new ArrayList<>(); + continue; + } + case "circle": // Not part of the standard, but we need it for internal serialization + currentLevelShapes.add(parseCircle(stream)); + break; + default: + throw new IllegalArgumentException("Unknown geometry type: " + type); + } + + if (!nextCloserOrComma(stream).equals(COMMA)) { + break; + } + } } - return new GeometryCollection<>(shapes); + + return new GeometryCollection<>(topLevelShapes); } private Point parsePoint(StreamTokenizer stream) throws IOException, ParseException {