Skip to content

SQL: Add support for histogram field  #74658

Closed as not planned
Closed as not planned
@matriv

Description

@matriv

Following #63289 it would be nice to also support this field type in SQL.
Currently if such a field is present in an index, SELECT * FROM my-index just skips the field.
Using the example of https://www.elastic.co/guide/en/elasticsearch/reference/master/histogram.html#histogram-ex,
SELECT * FROM "my-index-000001" returns:

    my_text    
---------------
histogram_1    
histogram_2   

Where as SELECT my_histogram from "my-index-000001" throws an exception as expected:

{
    "error": {
        "root_cause": [
            {
                "type": "verification_exception",
                "reason": "Found 1 problem\nline 1:8: Cannot use field [my_histogram] with unsupported type [histogram]"
            }
        ],
        "type": "verification_exception",
        "reason": "Found 1 problem\nline 1:8: Cannot use field [my_histogram] with unsupported type [histogram]"
    },
    "status": 400
}
  • Histogram field types are not indexed so they cannot be used in a WHERE filter:
POST /my-index-000001/_search
{
    "size": 1000,
    "query": {
        "term": {
            "my_histogram": {
                "value": 7,
                "boost": 1.0
            }
        }
    },
    "_source": false,
    "fields": [
        {
            "field": "my_histogram"
        },
        {
            "field": "my_text"
        }
    ],
    "sort": [
        {
            "_doc": {
                "order": "asc"
            }
        }
    ]
}

returns:

{
    "error": {
        "root_cause": [
            {
                "type": "query_shard_exception",
                "reason": "failed to create query: [histogram] field do not support searching, use dedicated aggregations instead: [my_histogram]",
                "index_uuid": "ASpPxIayTyq4zA-XS77ZUA",
                "index": "my-index-000001"
            }
        ],
        "type": "search_phase_execution_exception",
        "reason": "all shards failed",
        "phase": "query",
        "grouped": true,
        "failed_shards": [
            {
                "shard": 0,
                "index": "my-index-000001",
                "node": "KFidmtZDSuGMeFwn7qhA_A",
                "reason": {
                    "type": "query_shard_exception",
                    "reason": "failed to create query: [histogram] field do not support searching, use dedicated aggregations instead: [my_histogram]",
                    "index_uuid": "ASpPxIayTyq4zA-XS77ZUA",
                    "index": "my-index-000001",
                    "caused_by": {
                        "type": "illegal_argument_exception",
                        "reason": "[histogram] field do not support searching, use dedicated aggregations instead: [my_histogram]"
                    }
                }
            }
        ]
    },
    "status": 400
}
  • Histogram fields can be used only for aggregations like min,max, etc. and can also by used in composite aggs which are used under the hood for SQL GROUP BY queries:
{
    "size": 0,
    "_source": false,
    "aggregations": {
        "groupby": {
            "composite": {
                "size": 1000,
                "sources": [
                    {
                        "adc46d04": {
                            "terms": {
                                "field": "my_text",
                                "missing_bucket": true,
                                "order": "asc"
                            }
                        }
                    }
                ]
            },
            "aggregations": {
                "f98450ce": {
                    "avg": {
                        "field": "my_histogram"
                    }
                }
            }
        }
    }
}

returns:

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "groupby": {
            "after_key": {
                "adc46d04": "histogram_2"
            },
            "buckets": [
                {
                    "key": {
                        "adc46d04": "histogram_1"
                    },
                    "doc_count": 1,
                    "f98450ce": {
                        "value": 0.32156862745098036
                    }
                },
                {
                    "key": {
                        "adc46d04": "histogram_2"
                    },
                    "doc_count": 1,
                    "f98450ce": {
                        "value": 0.2989583333333333
                    }
                }
            ]
        }
    }
}
  • If histogram field type is to be returned in a simple SELECT my_histogram FROM ... or SELECT * FROM ... then special care should be taken for the (also for JDBC/ODBC) as the search results look like this:
{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": null,
        "hits": [
            {
                "_index": "my-index-000001",
                "_id": "1",
                "_score": null,
                "fields": {
                    "my_histogram": [
                        {
                            "counts": [
                                3,
                                7,
                                23,
                                12,
                                6
                            ],
                            "values": [
                                0.1,
                                0.2,
                                0.3,
                                0.4,
                                0.5
                            ]
                        }
                    ],
                    "my_text": [
                        "histogram_1"
                    ]
                },
                "sort": [
                    0
                ]
            },
            {
                "_index": "my-index-000001",
                "_id": "2",
                "_score": null,
                "fields": {
                    "my_histogram": [
                        {
                            "counts": [
                                8,
                                17,
                                8,
                                7,
                                6,
                                2
                            ],
                            "values": [
                                0.1,
                                0.25,
                                0.35,
                                0.4,
                                0.45,
                                0.5
                            ]
                        }
                    ],
                    "my_text": [
                        "histogram_2"
                    ]
                },
                "sort": [
                    1
                ]
            }
        ]
    }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions