elastic
diff --git a/‎buildSrc/src/main/resources/checkstyle_suppressions.xml‎
Lines changed: 0 additions & 2 deletions b/‎buildSrc/src/main/resources/checkstyle_suppressions.xml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎docs/reference/how-to/general.asciidoc‎
Lines changed: 0 additions & 91 deletions b/‎docs/reference/how-to/general.asciidoc‎
Lines changed: 0 additions & 91 deletions
diff --git a/‎docs/reference/index-modules/similarity.asciidoc‎
Lines changed: 12 additions & 2 deletions b/‎docs/reference/index-modules/similarity.asciidoc‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎docs/reference/mapping/dynamic/field-mapping.asciidoc‎
Lines changed: 1 addition & 1 deletion b/‎docs/reference/mapping/dynamic/field-mapping.asciidoc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/reference/mapping/dynamic/templates.asciidoc‎
Lines changed: 16 additions & 5 deletions b/‎docs/reference/mapping/dynamic/templates.asciidoc‎
Lines changed: 16 additions & 5 deletions
diff --git a/‎docs/reference/query-dsl/query-string-syntax.asciidoc‎
Lines changed: 4 additions & 21 deletions b/‎docs/reference/query-dsl/query-string-syntax.asciidoc‎
Lines changed: 4 additions & 21 deletions
diff --git a/‎modules/aggs-matrix-stats/src/main/java/org/elasticsearch/search/aggregations/support/MultiValuesSource.java‎
Lines changed: 1 addition & 1 deletion b/‎modules/aggs-matrix-stats/src/main/java/org/elasticsearch/search/aggregations/support/MultiValuesSource.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/DateMethodValueSource.java‎
Lines changed: 1 addition & 1 deletion b/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/DateMethodValueSource.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/DateObjectValueSource.java‎
Lines changed: 1 addition & 1 deletion b/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/DateObjectValueSource.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/FieldDataValueSource.java‎
Lines changed: 1 addition & 1 deletion b/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/FieldDataValueSource.java‎
Lines changed: 1 addition & 1 deletion
@@ -524,8 +524,6 @@
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]settings[/\\]ClusterSettingsIT.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]shards[/\\]ClusterSearchShardsIT.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]structure[/\\]RoutingIteratorTests.java" checks="LineLength" />
-  <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]blobstore[/\\]FsBlobStoreContainerTests.java" checks="LineLength" />
-  <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]blobstore[/\\]FsBlobStoreTests.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]breaker[/\\]MemoryCircuitBreakerTests.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]geo[/\\]ShapeBuilderTests.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]hash[/\\]MessageDigestsTests.java" checks="LineLength" />
 
@@ -40,94 +40,3 @@ better. For instance if a user searches for two words `foo` and `bar`, a match
 across different chapters is probably very poor, while a match within the same
 paragraph is likely good.
 
-[float]
-[[sparsity]]
-=== Avoid sparsity
-
-The data-structures behind Lucene, which Elasticsearch relies on in order to
-index and store data, work best with dense data, ie. when all documents have the
-same fields. This is especially true for fields that have norms enabled (which
-is the case for `text` fields by default) or doc values enabled (which is the
-case for numerics, `date`, `ip` and `keyword` by default).
-
-The reason is that Lucene internally identifies documents with so-called doc
-ids, which are integers between 0 and the total number of documents in the
-index. These doc ids are used for communication between the internal APIs of
-Lucene: for instance searching on a term with a `match` query produces an
-iterator of doc ids, and these doc ids are then used to retrieve the value of
-the `norm` in order to compute a score for these documents. The way this `norm`
-lookup is implemented currently is by reserving one byte for each document.
-The `norm` value for a given doc id can then be retrieved by reading the
-byte at index `doc_id`. While this is very efficient and helps Lucene quickly
-have access to the `norm` values of every document, this has the drawback that
-documents that do not have a value will also require one byte of storage.
-
-In practice, this means that if an index has `M` documents, norms will require
-`M` bytes of storage *per field*, even for fields that only appear in a small
-fraction of the documents of the index. Although slightly more complex with doc
-values due to the fact that doc values have multiple ways that they can be
-encoded depending on the type of field and on the actual data that the field
-stores, the problem is very similar. In case you wonder: `fielddata`, which was
-used in Elasticsearch pre-2.0 before being replaced with doc values, also
-suffered from this issue, except that the impact was only on the memory
-footprint since `fielddata` was not explicitly materialized on disk.
-
-Note that even though the most notable impact of sparsity is on storage
-requirements, it also has an impact on indexing speed and search speed since
-these bytes for documents that do not have a field still need to be written
-at index time and skipped over at search time.
-
-It is totally fine to have a minority of sparse fields in an index. But beware
-that if sparsity becomes the rule rather than the exception, then the index
-will not be as efficient as it could be.
-
-This section mostly focused on `norms` and `doc values` because those are the
-two features that are most affected by sparsity. Sparsity also affect the
-efficiency of the inverted index (used to index `text`/`keyword` fields) and
-dimensional points (used to index `geo_point` and numerics) but to a lesser
-extent.
-
-Here are some recommendations that can help avoid sparsity:
-
-[float]
-==== Avoid putting unrelated data in the same index
-
-You should avoid putting documents that have totally different structures into
-the same index in order to avoid sparsity. It is often better to put these
-documents into different indices, you could also consider giving fewer shards
-to these smaller indices since they will contain fewer documents overall.
-
-Note that this advice does not apply in the case that you need to use
-parent/child relations between your documents since this feature is only
-supported on documents that live in the same index.
-
-[float]
-==== Normalize document structures
-
-Even if you really need to put different kinds of documents in the same index,
-maybe there are opportunities to reduce sparsity. For instance if all documents
-in the index have a timestamp field but some call it `timestamp` and others
-call it `creation_date`, it would help to rename it so that all documents have
-the same field name for the same data.
-
-[float]
-==== Avoid types
-
-Types might sound like a good way to store multiple tenants in a single index.
-They are not: given that types store everything in a single index, having
-multiple types that have different fields in a single index will also cause
-problems due to sparsity as described above. If your types do not have very
-similar mappings, you might want to consider moving them to a dedicated index.
-
-[float]
-==== Disable `norms` and `doc_values` on sparse fields
-
-If none of the above recommendations apply in your case, you might want to
-check whether you actually need `norms` and `doc_values` on your sparse fields.
-`norms` can be disabled if producing scores is not necessary on a field, this is
-typically true for fields that are only used for filtering. `doc_values` can be
-disabled on fields that are neither used for sorting nor for aggregations.
-Beware that this decision should not be made lightly since these parameters
-cannot be changed on a live index, so you would have to reindex if you realize
-that you need `norms` or `doc_values`.
-
@@ -326,7 +326,18 @@ Which yields:
 // TESTRESPONSE[s/"took": 12/"took" : $body.took/]
 // TESTRESPONSE[s/OzrdjxNtQGaqs4DmioFw9A/$body.hits.hits.0._node/]
 
-You might have noticed that a significant part of the script depends on
+WARNING: While scripted similarities provide a lot of flexibility, there is
+a set of rules that they need to satisfy. Failing to do so could make
+Elasticsearch silently return wrong top hits or fail with internal errors at
+search time:
+
+ - Returned scores must be positive.
+ - All other variables remaining equal, scores must not decrease when
+   `doc.freq` increases.
+ - All other variables remaining equal, scores must not increase when
+   `doc.length` increases.
+
+You might have noticed that a significant part of the above script depends on
 statistics that are the same for every document. It is possible to make the
 above slightly more efficient by providing an `weight_script` which will
 compute the document-independent part of the score and will be available
@@ -491,7 +502,6 @@ GET /index/_search?explain=true
 
 ////////////////////
 
-
 Type name: `scripted`
 
 [float]
 
@@ -135,6 +135,6 @@ PUT my_index/_doc/1
 }
 --------------------------------------------------
 // CONSOLE
-<1> The `my_float` field is added as a <<number,`double`>> field.
+<1> The `my_float` field is added as a <<number,`float`>> field.
 <2> The `my_integer` field is added as a <<number,`long`>> field.
 
@@ -46,11 +46,22 @@ name as an existing template, it will replace the old version.
 [[match-mapping-type]]
 ==== `match_mapping_type`
 
-The `match_mapping_type` matches on the datatype detected by
-<<dynamic-field-mapping,dynamic field mapping>>, in other words, the datatype
-that Elasticsearch thinks the field should have.  Only the following datatypes
-can be automatically detected: `boolean`, `date`, `double`, `long`, `object`,
-`string`.  It also accepts `*` to match all datatypes.
+The `match_mapping_type` is the datatype detected by the json parser. Since
+JSON doesn't allow to distinguish a `long` from an `integer` or a `double` from
+a `float`, it will always choose the wider datatype, ie. `long` for integers
+and `double` for floating-point numbers.
+
+The following datatypes may be automatically detected:
+
+ - `boolean` when `true` or `false` are encountered.
+ - `date` when <<date-detection,date detection>> is enabled and a string is
+   found that matches any of the configured date formats.
+ - `double` for numbers with a decimal part.
+ - `long` for numbers without a decimal part.
+ - `object` for objects, also called hashes.
+ - `string` for character strings.
+
+`*` may also be used in order to match all datatypes.
 
 For example, if we wanted to map all integer fields as `integer` instead of
 `long`, and all `string` fields as both `text` and `keyword`, we
 
@@ -235,26 +235,10 @@ states that:
 * `news` must not be present
 * `quick` and `brown` are optional -- their presence increases the relevance
 
-The familiar operators `AND`, `OR` and `NOT` (also written `&&`, `||` and `!`)
-are also supported.  However, the effects of these operators can be more
-complicated than is obvious at first glance.  `NOT` takes precedence over
-`AND`, which takes precedence over `OR`.  While the `+` and `-` only affect
-the term to the right of the operator, `AND` and `OR` can affect the terms to
-the left and right.
-
-****
-Rewriting the above query using `AND`, `OR` and `NOT` demonstrates the
-complexity:
-
-`quick OR brown AND fox AND NOT news`::
-
-This is incorrect, because `brown` is now a required term.
-
-`(quick OR brown) AND fox AND NOT news`::
-
-This is incorrect because at least one of `quick` or `brown` is now required
-and the search for those terms would be scored differently from the original
-query.
+The familiar boolean operators `AND`, `OR` and `NOT` (also written `&&`, `||`
+and `!`) are also supported but beware that they do not honor the usual
+precedence rules, so parentheses should be used whenever multiple operators are
+used together. For instance the previous query could be rewritten as:
 
 `((quick AND fox) OR (brown AND fox) OR fox) AND NOT news`::
 
@@ -272,7 +256,6 @@ would look like this:
         }
     }
 
-****
 
 ===== Grouping
 
 
@@ -47,7 +47,7 @@ public NumericDoubleValues getField(final int ordinal, LeafReaderContext ctx) th
             if (ordinal > names.length) {
                 throw new IndexOutOfBoundsException("ValuesSource array index " + ordinal + " out of bounds");
             }
-            return multiValueMode.select(values[ordinal].doubleValues(ctx), Double.NEGATIVE_INFINITY);
+            return multiValueMode.select(values[ordinal].doubleValues(ctx));
         }
     }
 
 
@@ -54,7 +54,7 @@ class DateMethodValueSource extends FieldDataValueSource {
     public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
         AtomicNumericFieldData leafData = (AtomicNumericFieldData) fieldData.load(leaf);
         final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
-        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues(), 0d);
+        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues());
         return new DoubleDocValues(this) {
             @Override
             public double doubleVal(int docId) throws IOException {
 
@@ -56,7 +56,7 @@ class DateObjectValueSource extends FieldDataValueSource {
     public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
         AtomicNumericFieldData leafData = (AtomicNumericFieldData) fieldData.load(leaf);
         MutableDateTime joda = new MutableDateTime(0, DateTimeZone.UTC);
-        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues(), 0d);
+        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues());
         return new DoubleDocValues(this) {
             @Override
             public double doubleVal(int docId) throws IOException {
 
@@ -68,7 +68,7 @@ public int hashCode() {
     @SuppressWarnings("rawtypes") // ValueSource uses a rawtype
     public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
         AtomicNumericFieldData leafData = (AtomicNumericFieldData) fieldData.load(leaf);
-        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues(), 0d);
+        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues());
         return new DoubleDocValues(this) {
           @Override
           public double doubleVal(int doc) throws IOException {
Original file line number	Diff line number	Diff line change
`@@ -135,6 +135,6 @@ PUT my_index/_doc/1`
`135`	`135`	`}`
`136`	`136`	`--------------------------------------------------`
`137`	`137`	`// CONSOLE`
`138`		-<1> The `my_float` field is added as a <<number,`double`>> field.
	`138`	+<1> The `my_float` field is added as a <<number,`float`>> field.
`139`	`139`	<2> The `my_integer` field is added as a <<number,`long`>> field.
`140`	`140`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ public NumericDoubleValues getField(final int ordinal, LeafReaderContext ctx) th`
`47`	`47`	`if (ordinal > names.length) {`
`48`	`48`	`throw new IndexOutOfBoundsException("ValuesSource array index " + ordinal + " out of bounds");`
`49`	`49`	`}`
`50`		`- return multiValueMode.select(values[ordinal].doubleValues(ctx), Double.NEGATIVE_INFINITY);`
	`50`	`+ return multiValueMode.select(values[ordinal].doubleValues(ctx));`
`51`	`51`	`}`
`52`	`52`	`}`
`53`	`53`