elastic · cbuescher · Jan 27, 2015 · Jan 30, 2015 · jpountz · Jan 30, 2015
diff --git a/docs/reference/search/aggregations/bucket/histogram-aggregation.asciidoc b/docs/reference/search/aggregations/bucket/histogram-aggregation.asciidoc
@@ -6,7 +6,7 @@ It dynamically builds fixed size (a.k.a. interval) buckets over the values. For
 that holds a price (numeric), we can configure this aggregation to dynamically build buckets with interval `5`
 (in case of price it may represent $5). When the aggregation executes, the price field of every document will be
 evaluated and will be rounded down to its closest bucket - for example, if the price is `32` and the bucket size is `5`
-then the rounding will yield `30` and thus the document will "fall" into the bucket that is associated withe the key `30`.
+then the rounding will yield `30` and thus the document will "fall" into the bucket that is associated with the key `30`.
 To make this more formal, here is the rounding function that is used:
 
 [source,java]
@@ -326,6 +326,15 @@ Here is an example of what the response could look like:
 }
 --------------------------------------------------
 
+==== Offset
+
+By default the bucket keys start with 0 and then continue in even spaced steps of `interval`, e.g. if the interval is 10 the first buckets
+(assuming there is data inside them) will be [0 - 9), [10-19), [20-29). The bucket boundaries can be shifted by using the `offset` option.
+
+This can be best illustrated with an example. If there are 10 documents with values ranging from 5 to 14, using interval `10` will result in
+two buckets with 5 documents each. If an additional offset `5` is used, there will be only one single bucket [5-14) containing all the 10
+documents.
+
 ==== Response Format
 
 By default, the buckets are returned as an ordered array. It is also possible to request the response as a hash

diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramBuilder.java
@@ -35,8 +35,7 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
     private Long minDocCount;
     private Long extendedBoundsMin;
     private Long extendedBoundsMax;
-    private Long preOffset;
-    private Long postOffset;
+    private Long offset;
 
     /**
      * Constructs a new histogram aggregation builder.
@@ -92,18 +91,10 @@ public HistogramBuilder extendedBounds(Long min, Long max) {
     }
 
     /**
-     * Set the offset to apply prior to computing buckets.
+     * Set the offset to apply to shift bucket boundaries.
      */
-    public HistogramBuilder preOffset(long preOffset) {
-        this.preOffset = preOffset;
-        return this;
-    }
-
-    /**
-     * Set the offset to apply after having computed buckets.
-     */
-    public HistogramBuilder postOffset(long postOffset) {
-        this.postOffset = postOffset;
+    public HistogramBuilder offset(long offset) {
+        this.offset = offset;
         return this;
     }
 
@@ -119,15 +110,10 @@ protected XContentBuilder doInternalXContent(XContentBuilder builder, Params par
             order.toXContent(builder, params);
         }
 
-        if (preOffset != null) {
-            builder.field("pre_offset", preOffset);
-        }
-
-        if (postOffset != null) {
-            builder.field("post_offset", postOffset);
+        if (offset != null) {
+            builder.field("offset", offset);
         }
 
-
         if (minDocCount != null) {
             builder.field("min_doc_count", minDocCount);
         }

diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramParser.java
@@ -56,8 +56,7 @@ public AggregatorFactory parse(String aggregationName, XContentParser parser, Se
         InternalOrder order = (InternalOrder) InternalOrder.KEY_ASC;
         long interval = -1;
         ExtendedBounds extendedBounds = null;
-        long preOffset = 0;
-        long postOffset = 0;
+        long offset = 0;
 
         XContentParser.Token token;
         String currentFieldName = null;
@@ -73,10 +72,8 @@ public AggregatorFactory parse(String aggregationName, XContentParser parser, Se
                     minDocCount = parser.longValue();
                 } else if ("keyed".equals(currentFieldName)) {
                     keyed = parser.booleanValue();
-                } else if ("pre_offset".equals(currentFieldName) || "preOffset".equals(currentFieldName)) {
-                    preOffset = parser.longValue();
-                } else if ("post_offset".equals(currentFieldName) || "postOffset".equals(currentFieldName)) {
-                    postOffset = parser.longValue();
+                } else if ("offset".equals(currentFieldName) || "offset".equals(currentFieldName)) {
+                    offset = parser.longValue();
                 } else {
                     throw new SearchParseException(context, "Unknown key for a " + token + " in aggregation [" + aggregationName + "]: [" + currentFieldName + "].");
                 }
@@ -121,10 +118,10 @@ public AggregatorFactory parse(String aggregationName, XContentParser parser, Se
         if (interval < 0) {
             throw new SearchParseException(context, "Missing required field [interval] for histogram aggregation [" + aggregationName + "]");
         }
-        
+
         Rounding rounding = new Rounding.Interval(interval);
-        if (preOffset != 0 || postOffset != 0) {
-            rounding = new Rounding.PrePostRounding((Rounding.Interval) rounding, preOffset, postOffset);
+        if (offset != 0) {
+            rounding = new Rounding.PrePostRounding((Rounding.Interval) rounding, -offset, offset);
         }
 
         if (extendedBounds != null) {

diff --git a/src/test/java/org/elasticsearch/common/rounding/RoundingTests.java b/src/test/java/org/elasticsearch/common/rounding/RoundingTests.java
@@ -20,25 +20,82 @@
 package org.elasticsearch.common.rounding;
 
 import org.elasticsearch.test.ElasticsearchTestCase;
+import org.junit.Test;
 
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.lessThanOrEqualTo;
 
-
-
 public class RoundingTests extends ElasticsearchTestCase {
 
+    /**
+     * simple testcase to ilustrate how Rounding.Interval works on readable input
+     */
+    @Test
     public void testInterval() {
+        int interval = 10;
+        Rounding.Interval rounding = new Rounding.Interval(interval);
+        int value = 24;
+        final long key = rounding.roundKey(24);
+        final long r = rounding.round(24);
+        String message = "round(" + value + ", interval=" + interval + ") = " + r;
+        assertEquals(value/interval, key);
+        assertEquals(value/interval * interval, r);
+        assertEquals(message, 0, r % interval);
+    }
+
+    @Test
+    public void testIntervalRandom() {
         final long interval = randomIntBetween(1, 100);
         Rounding.Interval rounding = new Rounding.Interval(interval);
         for (int i = 0; i < 1000; ++i) {
             long l = Math.max(randomLong(), Long.MIN_VALUE + interval);
+            final long key = rounding.roundKey(l);
             final long r = rounding.round(l);
             String message = "round(" + l + ", interval=" + interval + ") = " + r;
             assertEquals(message, 0, r % interval);
             assertThat(message, r, lessThanOrEqualTo(l));
             assertThat(message, r + interval, greaterThan(l));
+            assertEquals(message, r, key*interval);
         }
     }
 
+    /**
+     * Simple testcase to ilustrate how Rounding.Pre works on readable input.
+     * preOffset shifts input value before rounding (so here 24 -> 31)
+     * postOffset shifts rounded Value after rounding (here 30 -> 35)
+     */
+    @Test
+    public void testPrePostRounding() {
+        int interval = 10;
+        int value = 24;
+        int preOffset = 7;
+        int postOffset = 5;
+        Rounding.PrePostRounding rounding = new Rounding.PrePostRounding(new Rounding.Interval(interval), preOffset, postOffset);
+        final long key = rounding.roundKey(24);
+        final long roundedValue = rounding.round(24);
+        String message = "round(" + value + ", interval=" + interval + ") = " + roundedValue;
+        assertEquals(3, key);
+        assertEquals(35, roundedValue);
+        assertEquals(message, postOffset, roundedValue % interval);
+    }
+
+    @Test
+    public void testPrePostRoundingRandom() {
+        final long interval = randomIntBetween(1, 100);
+        Rounding.Interval internalRounding = new Rounding.Interval(interval);
+        final long preRounding = randomIntBetween(-100, 100);
+        final long postRounding = randomIntBetween(-100, 100);
+        Rounding.PrePostRounding  prePost = new Rounding.PrePostRounding(new Rounding.Interval(interval), preRounding, postRounding);
+        long safetyMargin = Math.abs(interval) + Math.abs(preRounding) + Math.abs(postRounding); // to prevent range overflow / underflow
+        for (int i = 0; i < 1000; ++i) {
+            long l = Math.max(randomLong() - safetyMargin, Long.MIN_VALUE + safetyMargin);
+            final long key = prePost.roundKey(l);
+            final long r = prePost.round(l);
+            String message = "round(" + l + ", interval=" + interval + ") = "+ r;
+            assertEquals(message, internalRounding.round(l+preRounding), r - postRounding);
+            assertThat(message, r - postRounding, lessThanOrEqualTo(l + preRounding));
+            assertThat(message, r + interval - postRounding, greaterThan(l + preRounding));
+            assertEquals(message, r, key*interval + postRounding);
+        }
+    }
 }
diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/HistogramTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/HistogramTests.java
@@ -125,7 +125,6 @@ public void singleValuedField() throws Exception {
 
         assertSearchResponse(response);
 
-
         Histogram histo = response.getAggregations().get("histo");
         assertThat(histo, notNullValue());
         assertThat(histo.getName(), equalTo("histo"));
@@ -140,51 +139,71 @@ public void singleValuedField() throws Exception {
         }
     }
 
-    @Test
-    public void singleValuedField_withPreOffset() throws Exception {
-        long preOffsetMultiplier = randomIntBetween(2, 10);
-        SearchResponse response = client().prepareSearch("idx")
-                .addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval).preOffset(preOffsetMultiplier * interval))
+    public void singleValuedField_withOffset() throws Exception {
+        int interval1 = 10;
+        int offset = 5;
+        SearchResponse response = client()
+                .prepareSearch("idx")
+                .addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval1).offset(offset))
                 .execute().actionGet();
 
-        assertSearchResponse(response);
-
-
+        // from setup we have between 6 and 20 documents, each with value 1 in test field
+        int expectedNumberOfBuckets = (offset >= (numDocs % interval + 1)) ? numValueBuckets   : numValueBuckets + 1;
         Histogram histo = response.getAggregations().get("histo");
         assertThat(histo, notNullValue());
         assertThat(histo.getName(), equalTo("histo"));
-        List<? extends Bucket> buckets = histo.getBuckets();
-        assertThat(buckets.size(), equalTo(numValueBuckets));
-
-        for (int i = 0; i < numValueBuckets; ++i) {
-            Histogram.Bucket bucket = buckets.get(i);
-            assertThat(bucket, notNullValue());
-            assertThat(((Number) bucket.getKey()).longValue(), equalTo((long) (i + preOffsetMultiplier) * interval));
-            assertThat(bucket.getDocCount(), equalTo(valueCounts[i]));
-        }
+        assertThat(histo.getBuckets().size(), equalTo(expectedNumberOfBuckets));
+
+        // first bucket should start at -5, contain 4 documents
+        Histogram.Bucket bucket = histo.getBuckets().get(0);
+        assertThat(bucket, notNullValue());
+        assertThat(((Number) bucket.getKey()).longValue(), equalTo(-5L));
+        assertThat(bucket.getDocCount(), equalTo(4L));
+
+        // last bucket should have (numDocs % interval + 1) docs
+        bucket = histo.getBuckets().get(0);
+        assertThat(bucket, notNullValue());
+        assertThat(((Number) bucket.getKey()).longValue(), equalTo(numDocs%interval1 + 5L));
+        assertThat(bucket.getDocCount(), equalTo((numDocs % interval) + 1L));
     }
 
+    /**
+     * Shift buckets by random offset between [2..interval]. From setup we have 1 doc per values from 1..numdocs.
+     * Special care needs to be taken for expecations on counts in first and last bucket.
+     */
     @Test
-    public void singleValuedField_withPostOffset() throws Exception {
-        long postOffsetMultiplier = randomIntBetween(2, 10);
-        SearchResponse response = client().prepareSearch("idx")
-                .addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval).postOffset(postOffsetMultiplier * interval))
+    public void singleValuedField_withRandomOffset() throws Exception {
+        int offset = randomIntBetween(2, interval);
+        SearchResponse response = client()
+                .prepareSearch("idx")
+                .addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval).offset(offset))
                 .execute().actionGet();
-
         assertSearchResponse(response);
-
+        // shifting by offset>2 creates new extra bucket [0,offset-1]
+        // if offset is >= number of values in original last bucket, that effect is canceled
+        int expectedNumberOfBuckets = (offset >= (numDocs % interval + 1)) ? numValueBuckets   : numValueBuckets + 1;
 
         Histogram histo = response.getAggregations().get("histo");
         assertThat(histo, notNullValue());
         assertThat(histo.getName(), equalTo("histo"));
-        List<? extends Bucket> buckets = histo.getBuckets();
-        assertThat(buckets.size(), equalTo(numValueBuckets));
+        assertThat(histo.getBuckets().size(), equalTo(expectedNumberOfBuckets));
 
-        for (int i = 0; i < numValueBuckets; ++i) {
-            Histogram.Bucket bucket = buckets.get(i);
+        int docsCounted = 0;
+        for (int i = 0; i < expectedNumberOfBuckets; ++i) {
+            Histogram.Bucket bucket = histo.getBuckets().get(i);
             assertThat(bucket, notNullValue());
-            assertThat(((Number) bucket.getKey()).longValue(), equalTo((long) (i + postOffsetMultiplier) * interval));
-            assertThat(bucket.getDocCount(), equalTo(valueCounts[i]));
+            assertThat(((Number) bucket.getKey()).longValue(), equalTo((long) ((i-1) * interval  + offset)));
+            if (i==0) {
+                // first bucket
+                long expectedFirstBucketCount = offset-1;
+                assertThat(bucket.getDocCount(), equalTo(expectedFirstBucketCount));
+                docsCounted += expectedFirstBucketCount;
+            } else if(i<expectedNumberOfBuckets-1) {
+                assertThat(bucket.getDocCount(), equalTo((long) interval));
+                docsCounted += interval;
+            } else {
+                assertThat(bucket.getDocCount(), equalTo((long) numDocs - docsCounted));
+            }
         }
     }