Skip to content

Extend Filters, extract Ids #509

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
* A filter which selects OSM contributions by matching to a list of changeset ids.
*/
public class ChangesetIdFilterEqualsAnyOf extends NegatableFilter {
private final Collection<Long> changesetIdList;
private final Set<Long> changesetIds;

ChangesetIdFilterEqualsAnyOf(@Nonnull Collection<Long> changesetIdList) {
this(new HashSet<>(changesetIdList));
}
ChangesetIdFilterEqualsAnyOf(@Nonnull Set<Long> changesetIds) {
super(new FilterInternal() {
private final Set<Long> changesetIds = new HashSet<>(changesetIdList);

@Override
public boolean applyOSH(OSHEntity entity) {
Expand All @@ -37,15 +39,15 @@ public boolean applyOSMEntitySnapshot(OSMEntitySnapshot ignored) {

@Override
public String toString() {
return "changeset:in(" + changesetIdList.stream().map(String::valueOf)
return "changeset:in(" + changesetIds.stream().map(String::valueOf)
.collect(Collectors.joining(",")) + ")";
}
});
this.changesetIdList = changesetIdList;
this.changesetIds = changesetIds;
}

@Contract(pure = true)
public Collection<Long> getChangesetIdList() {
return this.changesetIdList;
public Set<Long> getChangesetIdList() {
return this.changesetIds;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
* A filter which selects OSM contributions by matching to a range of changeset ids.
*/
public class ChangesetIdFilterRange extends NegatableFilter {
private final IdRange changesetIdRange;

ChangesetIdFilterRange(IdRange changesetIdRange) {
super(new FilterInternal() {
@Override
Expand All @@ -30,5 +32,10 @@ public String toString() {
return "changeset:in-range" + changesetIdRange;
}
});
this.changesetIdRange = changesetIdRange;
}

public IdRange getChangesetIdRange() {
return changesetIdRange;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ default boolean applyOSMGeometry(OSMEntity entity, Geometry geometry) {
* Apply a filter to a snapshot ({@link OSMEntitySnapshot}) of an OSM entity.
*
* @param snapshot a snapshot of the OSM entity to check
* @return true if the the OSM entity snapshot fulfills the specified filter, otherwise false.
* @return true if the OSM entity snapshot fulfills the specified filter, otherwise false.
*/
@Contract(pure = true)
default boolean applyOSMEntitySnapshot(OSMEntitySnapshot snapshot) {
Expand All @@ -95,7 +95,7 @@ default boolean applyOSMEntitySnapshot(OSMEntitySnapshot snapshot) {
* modification or the state of it after the modification matches the filter.</p>
*
* @param contribution a modification of the OSM entity to check
* @return true if the the OSM contribution fulfills the specified filter, otherwise false.
* @return true if the OSM contribution fulfills the specified filter, otherwise false.
*/
@Contract(pure = true)
default boolean applyOSMContribution(OSMContribution contribution) {
Expand Down Expand Up @@ -133,11 +133,11 @@ default boolean applyOSMContribution(OSMContribution contribution) {
*/
@Contract(pure = true)
default List<List<Filter>> normalize() {
if (this instanceof Filter) {
return Collections.singletonList(Collections.singletonList((Filter) this));
} else if (this instanceof AndOperator) {
List<List<Filter>> exp1 = ((BinaryOperator) this).getLeftOperand().normalize();
List<List<Filter>> exp2 = ((BinaryOperator) this).getRightOperand().normalize();
if (this instanceof Filter filter) {
return Collections.singletonList(Collections.singletonList(filter));
} else if (this instanceof AndOperator operator) {
List<List<Filter>> exp1 = operator.getLeftOperand().normalize();
List<List<Filter>> exp2 = operator.getRightOperand().normalize();
// return cross product of exp1 and exp2
List<List<Filter>> combined = new LinkedList<>();
for (List<Filter> e1 : exp1) {
Expand All @@ -149,9 +149,9 @@ default List<List<Filter>> normalize() {
}
}
return combined;
} else if (this instanceof OrOperator) {
List<List<Filter>> exp1 = ((BinaryOperator) this).getLeftOperand().normalize();
List<List<Filter>> exp2 = ((BinaryOperator) this).getRightOperand().normalize();
} else if (this instanceof OrOperator operator) {
List<List<Filter>> exp1 = operator.getLeftOperand().normalize();
List<List<Filter>> exp2 = operator.getRightOperand().normalize();
List<List<Filter>> combined = new ArrayList<>(exp1.size() + exp2.size());
combined.addAll(exp1);
combined.addAll(exp2);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package org.heigit.ohsome.oshdb.filter;

import org.heigit.ohsome.oshdb.OSHDBTag;
import org.heigit.ohsome.oshdb.osm.OSMType;

import java.util.*;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Stream;

import static java.util.Collections.*;
import static java.util.Comparator.comparingLong;

public class FilterUtil {

private FilterUtil() {
throw new IllegalStateException("utility class");
}

public static Map<EnumSet<OSMType>, Set<OSHDBTag>> tags(FilterExpression expression, Predicate<OSHDBTag> test) {
return tags(expression.normalize(), test);
}

public static Map<EnumSet<OSMType>, Set<OSHDBTag>> tags(List<List<Filter>> normalized, Predicate<OSHDBTag> test) {
return extract(normalized, filter -> {
if (filter instanceof TagFilterEquals tagFilter) {
return Stream.of(tagFilter.getTag()).filter(test);
} else if (filter instanceof TagFilterEqualsAny tagFilter) {
return Stream.of(tagFilter.getTag()).map(key -> new OSHDBTag(key.toInt(), -1)).filter(test);
} else if (filter instanceof TagFilterEqualsAnyOf tagFilter) {
return tagFilter.tags.stream().filter(test);
}
return Stream.empty();
});
}

private static <T> Map<EnumSet<OSMType>, Set<T>> extract(List<List<Filter>> normalized, Function<Filter, Stream<T>> extractor) {
var result = new HashMap<EnumSet<OSMType>, Set<T>>();
for(var group: normalized){
var type = EnumSet.allOf(OSMType.class);
var groupResult = new HashSet<T>();
for (var filter : group) {
if (filter instanceof TypeFilter typeFilter) {
type = EnumSet.of(typeFilter.getType());
} else {
Comment on lines +43 to +45
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is missing a case: GeometryTypeFilters do implicitly also filter by type (e.g. geometry: polygon is equivalent to something like (type:way or (type:relation and type=multipolygon)) and <actual geometry check>.

Something like this should work:

Suggested change
if (filter instanceof TypeFilter typeFilter) {
type = EnumSet.of(typeFilter.getType());
} else {
if (filter instanceof TypeFilter typeFilter) {
type.retainAll(EnumSet.of(typeFilter.getType()));
} else if (filter instanceof GeometryTypeFilter geometryFilter) {
type.retainAll(geometryFilter.getOSMTypes());
} else {

Another question is if there are cases where we want the extractor to be called with GeometryTypeFilters. 🤔

extractor.apply(filter).forEach(groupResult::add);
}
}
if (groupResult.isEmpty()) {
return emptyMap();
}
result.computeIfAbsent(type, x -> new HashSet<>()).addAll(groupResult);
}
return result;
}

public static Map<EnumSet<OSMType>, Set<IdRange>> extractIds(FilterExpression expression) {
return extractIds(expression.normalize());
}

public static Map<EnumSet<OSMType>, Set<IdRange>> extractIds(List<List<Filter>> normalized) {
var result = extract(normalized, filter -> {
if (filter instanceof IdFilterEquals idFilter){
return Stream.of(new IdRange(idFilter.getId()));
} else if (filter instanceof IdFilterEqualsAnyOf idFilter) {
return idFilter.getIds().stream().map(IdRange::new);
} else if (filter instanceof IdFilterRange idFilter) {
return Stream.of(idFilter.getRange());
}
return Stream.empty();
});
compactRanges(result);
return result;
}

private static void compactRanges(Map<EnumSet<OSMType>, Set<IdRange>> result) {
for (var entry : result.entrySet()) {
var ranges = new ArrayList<>(entry.getValue());
var compact = new HashSet<IdRange>(ranges.size());
ranges.sort(comparingLong(IdRange::getFromId));
var itr = ranges.iterator();
var range = itr.next();
while (itr.hasNext()) {
var next = itr.next();
if (next.getFromId() <= range.getToId() + 1) {
range = new IdRange(range.getFromId(), next.getToId());
} else {
compact.add(range);
range = next;
}
}
compact.add(range);
entry.setValue(compact);
}
}
Copy link
Member

@tyrasd tyrasd Jun 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could move the code from optimizeFilters1 to this class?!

    public static Set<OSMType> extractTypes(FilterExpression expression) {
        return extractTypes(expression.normalize());
    }

    public static Set<OSMType> extractTypes(List<List<Filter>> normalized) {
        var allTypes = EnumSet.noneOf(OSMType.class);
        extract(normalized, ignored -> Stream.of(true)).keySet()
                .forEach(allTypes::addAll);
        return allTypes;
    }

and in MapReducer.optimizeFilters1:

    mapRed = mapRed.osmTypeInternal(FilterUtil.extractTypes(filter));

}
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,19 @@
* A tag filter which executes a "id [not] in (id1, id2, …)" check.
*/
public class IdFilterEqualsAnyOf extends NegatableFilter {

private final Set<Long> ids;

IdFilterEqualsAnyOf(@Nonnull Collection<Long> idList) {
this(new HashSet<>(idList));
}

IdFilterEqualsAnyOf(@Nonnull Set<Long> ids) {
super(new FilterInternal() {
private final Set<Long> ids = new HashSet<>(idList);

@Override
public boolean applyOSH(OSHEntity entity) {
return this.ids.contains(entity.getId());
return ids.contains(entity.getId());
}

@Override
Expand All @@ -28,7 +34,7 @@ boolean applyOSHNegated(OSHEntity entity) {

@Override
public boolean applyOSM(OSMEntity entity) {
return this.ids.contains(entity.getId());
return ids.contains(entity.getId());
}

@Override
Expand All @@ -38,12 +44,18 @@ boolean applyOSMNegated(OSMEntity entity) {

@Override
public String toString() {
return "id:in" + this.ids.stream().map(String::valueOf).collect(Collectors.joining(","));
return "id:in" + ids.stream().map(String::valueOf).collect(Collectors.joining(","));
}
});

if (idList.isEmpty()) {
if (ids.isEmpty()) {
throw new IllegalStateException("list of ids must not be empty in a id in (list) filter");
}

this.ids = ids;
}

public Set<Long> getIds() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could be included in ParseTest.testIdFilterEqualsAnyOf to check if it returns the expected values

return ids;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
* A filter which executes a "id [not] in range" check.
*/
public class IdFilterRange extends NegatableFilter {
private final IdRange range;

IdFilterRange(@Nonnull IdRange range) {
super(new FilterInternal() {
@Override
Expand Down Expand Up @@ -35,5 +37,10 @@ public String toString() {
return "id:in-range" + range;
}
});
this.range = range;
}

public IdRange getRange() {
return range;
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
package org.heigit.ohsome.oshdb.filter;

import java.io.Serializable;
import java.util.Objects;
import java.util.stream.LongStream;

/**
* Helper class to handle ranges of ids (incl. user ids, changeset ids, etc.).
*
* <p>The range's limits are tested inclusively: the range (10..12) would match the values 10,
* 11 and 12, but not 9 or 13 for example.</p>
*/
class IdRange implements Serializable {
public class IdRange implements Serializable {

private final long fromId;
private final long toId;
Expand All @@ -19,11 +21,15 @@ class IdRange implements Serializable {
* @param fromId lower limit of the range.
* @param toId upper limit of the range.
*/
IdRange(long fromId, long toId) {
public IdRange(long fromId, long toId) {
this.fromId = Math.min(fromId, toId);
this.toId = Math.max(fromId, toId);
}

public IdRange(long id) {
this(id, id);
}

/** Checks if the given id falls into the id range. */
public boolean test(long id) {
return id >= fromId && id <= toId;
Expand All @@ -34,4 +40,33 @@ public String toString() {
+ ".."
+ (toId == Long.MAX_VALUE ? "" : toId);
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof IdRange idRange)) return false;
return fromId == idRange.fromId && toId == idRange.toId;
}

@Override
public int hashCode() {
return Objects.hash(fromId, toId);
}

public long getFromId() {
return fromId;
}

public long getToId() {
return toId;
}

public long size() {
return 1 + toId - fromId;
}

/** Converts the id range to a stream of consecutive ids. */
public LongStream getIds() {
return LongStream.range(fromId, toId+1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,22 @@ abstract class TagFilterAnyOf implements Filter {

TagFilterAnyOf(@Nonnull Collection<OSHDBTag> tags) {
Optional<OSHDBTag> firstTag = tags.stream().findFirst();
if (!firstTag.isPresent()) {
throw new IllegalStateException("list of tags must not be empty in a key in (values) filter");
} else {
this.keyId = firstTag.get().getKey();
this.tags = new HashSet<>(tags);
}
this.keyId = firstTag
.orElseThrow(() -> new IllegalStateException("list of tags must not be empty in a key in (values) filter"))
.getKey();
this.tags = new HashSet<>(tags);

if (!tags.stream().allMatch(tag -> tag.getKey() == this.keyId)) {
throw new IllegalStateException(
"list of tags must all share the same tag key in a key in (values) filter");
}
}

public int getKeyId() {
return keyId;
}

public Set<OSHDBTag> getTags() {
return tags;
}
}
Loading