|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the Elastic License; |
| 4 | + * you may not use this file except in compliance with the Elastic License. |
| 5 | + */ |
| 6 | + |
| 7 | +package org.elasticsearch.xpack.ingest; |
| 8 | + |
| 9 | +import org.elasticsearch.ingest.AbstractProcessor; |
| 10 | +import org.elasticsearch.ingest.ConfigurationUtils; |
| 11 | +import org.elasticsearch.ingest.IngestDocument; |
| 12 | +import org.elasticsearch.ingest.Processor; |
| 13 | + |
| 14 | +import java.net.URI; |
| 15 | +import java.net.URISyntaxException; |
| 16 | +import java.util.HashMap; |
| 17 | +import java.util.Map; |
| 18 | + |
| 19 | +public class UriPartsProcessor extends AbstractProcessor { |
| 20 | + |
| 21 | + public static final String TYPE = "uri_parts"; |
| 22 | + |
| 23 | + private final String field; |
| 24 | + private final String targetField; |
| 25 | + private final boolean removeIfSuccessful; |
| 26 | + private final boolean keepOriginal; |
| 27 | + |
| 28 | + UriPartsProcessor(String tag, String description, String field, String targetField, boolean removeIfSuccessful, boolean keepOriginal) { |
| 29 | + super(tag, description); |
| 30 | + this.field = field; |
| 31 | + this.targetField = targetField; |
| 32 | + this.removeIfSuccessful = removeIfSuccessful; |
| 33 | + this.keepOriginal = keepOriginal; |
| 34 | + } |
| 35 | + |
| 36 | + public String getField() { |
| 37 | + return field; |
| 38 | + } |
| 39 | + |
| 40 | + public String getTargetField() { |
| 41 | + return targetField; |
| 42 | + } |
| 43 | + |
| 44 | + public boolean getRemoveIfSuccessful() { |
| 45 | + return removeIfSuccessful; |
| 46 | + } |
| 47 | + |
| 48 | + public boolean getKeepOriginal() { |
| 49 | + return keepOriginal; |
| 50 | + } |
| 51 | + |
| 52 | + @Override |
| 53 | + public IngestDocument execute(IngestDocument ingestDocument) throws Exception { |
| 54 | + String value = ingestDocument.getFieldValue(field, String.class); |
| 55 | + |
| 56 | + URI uri; |
| 57 | + try { |
| 58 | + uri = new URI(value); |
| 59 | + } catch (URISyntaxException e) { |
| 60 | + throw new IllegalArgumentException("unable to parse URI [" + value + "]"); |
| 61 | + } |
| 62 | + var uriParts = new HashMap<String, Object>(); |
| 63 | + uriParts.put("domain", uri.getHost()); |
| 64 | + if (uri.getFragment() != null) { |
| 65 | + uriParts.put("fragment", uri.getFragment()); |
| 66 | + } |
| 67 | + if (keepOriginal) { |
| 68 | + uriParts.put("original", value); |
| 69 | + } |
| 70 | + final String path = uri.getPath(); |
| 71 | + if (path != null) { |
| 72 | + uriParts.put("path", path); |
| 73 | + if (path.contains(".")) { |
| 74 | + int periodIndex = path.lastIndexOf('.'); |
| 75 | + uriParts.put("extension", periodIndex < path.length() ? path.substring(periodIndex + 1) : ""); |
| 76 | + } |
| 77 | + } |
| 78 | + if (uri.getPort() != -1) { |
| 79 | + uriParts.put("port", uri.getPort()); |
| 80 | + } |
| 81 | + if (uri.getQuery() != null) { |
| 82 | + uriParts.put("query", uri.getQuery()); |
| 83 | + } |
| 84 | + uriParts.put("scheme", uri.getScheme()); |
| 85 | + final String userInfo = uri.getUserInfo(); |
| 86 | + if (userInfo != null) { |
| 87 | + uriParts.put("user_info", userInfo); |
| 88 | + if (userInfo.contains(":")) { |
| 89 | + int colonIndex = userInfo.indexOf(":"); |
| 90 | + uriParts.put("username", userInfo.substring(0, colonIndex)); |
| 91 | + uriParts.put("password", colonIndex < userInfo.length() ? userInfo.substring(colonIndex + 1) : ""); |
| 92 | + } |
| 93 | + } |
| 94 | + |
| 95 | + if (removeIfSuccessful && targetField.equals(field) == false) { |
| 96 | + ingestDocument.removeField(field); |
| 97 | + } |
| 98 | + ingestDocument.setFieldValue(targetField, uriParts); |
| 99 | + return ingestDocument; |
| 100 | + } |
| 101 | + |
| 102 | + @Override |
| 103 | + public String getType() { |
| 104 | + return TYPE; |
| 105 | + } |
| 106 | + |
| 107 | + public static final class Factory implements Processor.Factory { |
| 108 | + |
| 109 | + @Override |
| 110 | + public UriPartsProcessor create( |
| 111 | + Map<String, Processor.Factory> registry, |
| 112 | + String processorTag, |
| 113 | + String description, |
| 114 | + Map<String, Object> config |
| 115 | + ) throws Exception { |
| 116 | + String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); |
| 117 | + String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "url"); |
| 118 | + boolean removeIfSuccessful = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "remove_if_successful", false); |
| 119 | + boolean keepOriginal = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "keep_original", true); |
| 120 | + return new UriPartsProcessor(processorTag, description, field, targetField, removeIfSuccessful, keepOriginal); |
| 121 | + } |
| 122 | + } |
| 123 | +} |
0 commit comments