@@ -30,10 +30,27 @@ include::install_remove.asciidoc[]
30
30
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
31
31
|======
32
32
33
- For example, this:
33
+ [discrete]
34
+ [[ingest-attachment-json-ex]]
35
+ ==== Example
36
+
37
+ If attaching files to JSON documents, you must first encode the file as a base64
38
+ string. On Unix-like systems, you can do this using a `base64` command:
39
+
40
+ [source,shell]
41
+ ----
42
+ base64 -in myfile.rtf
43
+ ----
44
+
45
+ The command returns the base64-encoded string for the file. The following base64
46
+ string is for an `.rtf` file containing the text `Lorem ipsum dolor sit amet`:
47
+ `e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=`.
48
+
49
+ Use an attachment processor to decode the string and extract the file's
50
+ properties:
34
51
35
52
[source,console]
36
- --------------------------------------------------
53
+ ----
37
54
PUT _ingest/pipeline/attachment
38
55
{
39
56
"description" : "Extract attachment information",
@@ -45,20 +62,20 @@ PUT _ingest/pipeline/attachment
45
62
}
46
63
]
47
64
}
48
- PUT my-index-00001 /_doc/my_id?pipeline=attachment
65
+ PUT my-index-000001 /_doc/my_id?pipeline=attachment
49
66
{
50
67
"data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0="
51
68
}
52
- GET my-index-00001 /_doc/my_id
53
- --------------------------------------------------
69
+ GET my-index-000001 /_doc/my_id
70
+ ----
54
71
55
- Returns this :
72
+ The document's `attachment` object contains extracted properties for the file :
56
73
57
74
[source,console-result]
58
- --------------------------------------------------
75
+ ----
59
76
{
60
77
"found": true,
61
- "_index": "my-index-00001 ",
78
+ "_index": "my-index-000001 ",
62
79
"_type": "_doc",
63
80
"_id": "my_id",
64
81
"_version": 1,
@@ -74,14 +91,13 @@ Returns this:
74
91
}
75
92
}
76
93
}
77
- --------------------------------------------------
94
+ ----
78
95
// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/]
79
96
80
-
81
- To specify only some fields to be extracted:
97
+ To extract only certain `attachment` fields, specify the `properties` array:
82
98
83
99
[source,console]
84
- --------------------------------------------------
100
+ ----
85
101
PUT _ingest/pipeline/attachment
86
102
{
87
103
"description" : "Extract attachment information",
@@ -94,7 +110,7 @@ PUT _ingest/pipeline/attachment
94
110
}
95
111
]
96
112
}
97
- --------------------------------------------------
113
+ ----
98
114
99
115
NOTE: Extracting contents from binary data is a resource intensive operation and
100
116
consumes a lot of resources. It is highly recommended to run pipelines
@@ -175,11 +191,11 @@ PUT _ingest/pipeline/attachment
175
191
}
176
192
]
177
193
}
178
- PUT my-index-00001 /_doc/my_id?pipeline=attachment
194
+ PUT my-index-000001 /_doc/my_id?pipeline=attachment
179
195
{
180
196
"data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0="
181
197
}
182
- GET my-index-00001 /_doc/my_id
198
+ GET my-index-000001 /_doc/my_id
183
199
--------------------------------------------------
184
200
185
201
Returns this:
@@ -188,7 +204,7 @@ Returns this:
188
204
--------------------------------------------------
189
205
{
190
206
"found": true,
191
- "_index": "my-index-00001 ",
207
+ "_index": "my-index-000001 ",
192
208
"_type": "_doc",
193
209
"_id": "my_id",
194
210
"_version": 1,
@@ -223,12 +239,12 @@ PUT _ingest/pipeline/attachment
223
239
}
224
240
]
225
241
}
226
- PUT my-index-00001 /_doc/my_id_2?pipeline=attachment
242
+ PUT my-index-000001 /_doc/my_id_2?pipeline=attachment
227
243
{
228
244
"data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=",
229
245
"max_size": 5
230
246
}
231
- GET my-index-00001 /_doc/my_id_2
247
+ GET my-index-000001 /_doc/my_id_2
232
248
--------------------------------------------------
233
249
234
250
Returns this:
@@ -237,7 +253,7 @@ Returns this:
237
253
--------------------------------------------------
238
254
{
239
255
"found": true,
240
- "_index": "my-index-00001 ",
256
+ "_index": "my-index-000001 ",
241
257
"_type": "_doc",
242
258
"_id": "my_id_2",
243
259
"_version": 1,
@@ -309,7 +325,7 @@ PUT _ingest/pipeline/attachment
309
325
}
310
326
]
311
327
}
312
- PUT my-index-00001 /_doc/my_id?pipeline=attachment
328
+ PUT my-index-000001 /_doc/my_id?pipeline=attachment
313
329
{
314
330
"attachments" : [
315
331
{
@@ -322,15 +338,15 @@ PUT my-index-00001/_doc/my_id?pipeline=attachment
322
338
}
323
339
]
324
340
}
325
- GET my-index-00001 /_doc/my_id
341
+ GET my-index-000001 /_doc/my_id
326
342
--------------------------------------------------
327
343
328
344
Returns this:
329
345
330
346
[source,console-result]
331
347
--------------------------------------------------
332
348
{
333
- "_index" : "my-index-00001 ",
349
+ "_index" : "my-index-000001 ",
334
350
"_type" : "_doc",
335
351
"_id" : "my_id",
336
352
"_version" : 1,
0 commit comments