Skip to content

ingest: Dissect processor and lib for 6x #33422

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
431 changes: 312 additions & 119 deletions docs/reference/ingest/ingest-node.asciidoc

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions libs/dissect/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import org.elasticsearch.gradle.precommit.PrecommitTasks

/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

archivesBaseName = 'elasticsearch-dissect'

dependencies {
if (isEclipse == false || project.path == ":libs:dissect-tests") {
testCompile("org.elasticsearch.test:framework:${version}") {
exclude group: 'org.elasticsearch', module: 'dissect'
}
}
testCompile "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
testCompile("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}")
testCompile("com.fasterxml.jackson.core:jackson-databind:${versions.jackson}")
}

forbiddenApisMain {
replaceSignatureFiles 'jdk-signatures'
}

if (isEclipse) {
// in eclipse the project is under a fake root, we need to change around the source sets
sourceSets {
if (project.path == ":libs:dissect") {
main.java.srcDirs = ['java']
main.resources.srcDirs = ['resources']
} else {
test.java.srcDirs = ['java']
test.resources.srcDirs = ['resources']
}
}
}
3 changes: 3 additions & 0 deletions libs/dissect/src/main/eclipse-build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

// this is just shell gradle file for eclipse to have separate projects for dissect src and tests
apply from: '../../build.gradle'
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.dissect;

/**
* Parent class for all dissect related exceptions. Consumers may catch this exception or more specific child exceptions.
*/
public abstract class DissectException extends RuntimeException {
DissectException(String message) {
super(message);
}

/**
* Error while parsing a dissect pattern
*/
static class PatternParse extends DissectException {
PatternParse(String pattern, String reason) {
super("Unable to parse pattern: " + pattern + " Reason: " + reason);
}
}

/**
* Error while parsing a dissect key
*/
static class KeyParse extends DissectException {
KeyParse(String key, String reason) {
super("Unable to parse key: " + key + " Reason: " + reason);
}
}

/**
* Unable to find a match between pattern and source string
*/
static class FindMatch extends DissectException {
FindMatch(String pattern, String source) {
super("Unable to find match for dissect pattern: " + pattern + " against source: " + source);

}
}
}
191 changes: 191 additions & 0 deletions libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.dissect;

import java.util.EnumSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* <p>A Key of a dissect pattern. This class models the name and modifiers and provides some validation.</p>
* <p>For dissect pattern of {@code %{a} %{+a} %{b}} the dissect keys are:
* <ul>
* <li>{@code a}</li>
* <li>{@code +a}</li>
* <li>{@code b}</li>
* </ul>
* This class represents a single key.
* <p>A single key is composed of a name and it's modifiers. For the key {@code +a}, {@code a} is the name and {@code +} is the modifier.
* @see DissectParser
*/
public final class DissectKey {
private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+*&?])(.*?)(->)?$", Pattern.DOTALL);
private static final Pattern RIGHT_PADDING_PATTERN = Pattern.compile("^(.*?)(->)?$", Pattern.DOTALL);
private static final Pattern APPEND_WITH_ORDER_PATTERN = Pattern.compile("[+](.*?)(/)([0-9]+)(->)?$", Pattern.DOTALL);
private final Modifier modifier;
private boolean skip;
private boolean skipRightPadding;
private int appendPosition;
private String name;

/**
* Constructor - parses the String key into it's name and modifier(s)
*
* @param key The key without the leading <code>%{</code> or trailing <code>}</code>, for example {@code a->}
*/
DissectKey(String key) {
skip = key == null || key.isEmpty();
modifier = Modifier.findModifier(key);
switch (modifier) {
case NONE:
Matcher matcher = RIGHT_PADDING_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(1);
skipRightPadding = matcher.group(2) != null;
}
skip = name.isEmpty();
break;
case NAMED_SKIP:
matcher = LEFT_MODIFIER_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(2);
skipRightPadding = matcher.group(3) != null;
}
skip = true;
break;
case APPEND:
matcher = LEFT_MODIFIER_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(2);
skipRightPadding = matcher.group(3) != null;
}
break;
case FIELD_NAME:
matcher = LEFT_MODIFIER_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(2);
skipRightPadding = matcher.group(3) != null;
}
break;
case FIELD_VALUE:
matcher = LEFT_MODIFIER_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(2);
skipRightPadding = matcher.group(3) != null;
}
break;
case APPEND_WITH_ORDER:
matcher = APPEND_WITH_ORDER_PATTERN.matcher(key);
while (matcher.find()) {
name = matcher.group(1);
appendPosition = Short.valueOf(matcher.group(3));
skipRightPadding = matcher.group(4) != null;
}
break;
}

if (name == null || (name.isEmpty() && !skip)) {
throw new DissectException.KeyParse(key, "The key name could be determined");
}
}

/**
* Copy constructor to explicitly override the modifier.
* @param key The key to copy (except for the modifier)
* @param modifier the modifer to use for this copy
*/
DissectKey(DissectKey key, DissectKey.Modifier modifier){
this.modifier = modifier;
this.skipRightPadding = key.skipRightPadding;
this.skip = key.skip;
this.name = key.name;
this.appendPosition = key.appendPosition;
}

Modifier getModifier() {
return modifier;
}

boolean skip() {
return skip;
}

boolean skipRightPadding() {
return skipRightPadding;
}

int getAppendPosition() {
return appendPosition;
}

String getName() {
return name;
}

//generated
@Override
public String toString() {
return "DissectKey{" +
"modifier=" + modifier +
", skip=" + skip +
", appendPosition=" + appendPosition +
", name='" + name + '\'' +
'}';
}

public enum Modifier {
NONE(""), APPEND_WITH_ORDER("/"), APPEND("+"), FIELD_NAME("*"), FIELD_VALUE("&"), NAMED_SKIP("?");

private static final Pattern MODIFIER_PATTERN = Pattern.compile("[/+*&?]");

private final String modifier;

@Override
public String toString() {
return modifier;
}

Modifier(final String modifier) {
this.modifier = modifier;
}

//package private for testing
static Modifier fromString(String modifier) {
return EnumSet.allOf(Modifier.class).stream().filter(km -> km.modifier.equals(modifier))
.findFirst().orElseThrow(() -> new IllegalArgumentException("Found invalid modifier.")); //throw should never happen
}

private static Modifier findModifier(String key) {
Modifier modifier = Modifier.NONE;
if (key != null && !key.isEmpty()) {
Matcher matcher = MODIFIER_PATTERN.matcher(key);
int matches = 0;
while (matcher.find()) {
Modifier priorModifier = modifier;
modifier = Modifier.fromString(matcher.group());
if (++matches > 1 && !(APPEND.equals(priorModifier) && APPEND_WITH_ORDER.equals(modifier))) {
throw new DissectException.KeyParse(key, "multiple modifiers are not allowed.");
}
}
}
return modifier;
}
}
}
Loading