-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Description
Our migration from the elasticsearch-dsl package to the elasticsearch package uncovered a regression in the behavior of copy_to, specifically when a field's definition includes a copy into multiple other fields (as opposed to just one other field).
The unit test below illustrates this. It passes on elasticsearch-dsl 18.17.1 and fails on elasticsearch 18.18.0 . The document class defines two copy_to fields: all should end up with a copy of first_name, last_name and birth_place, whereas full_name should end up with a copy of fist_name and last_name. Three out of the 4 test cases fail because all ends up only with the copy of birth_place and full_name is not populated.
The cause of this is the change introduced in line 283 of elasticsearch.dsl.fields.py, where the copy_to argument is forced into a string (it should be allowed to be a list of strings).
from unittest import TestCase
from elasticsearch.dsl import Document, Text
from elasticsearch.dsl.connections import connections
class Person(Document):
full_name = Text(store=True)
all = Text(store=True)
first_name = Text(copy_to=["full_name", "all"])
last_name = Text(copy_to=["full_name", "all"])
birth_place= Text(copy_to="all")
class Index:
name = "people"
class CopyToTests(TestCase):
def setUp(self):
connections.create_connection(hosts=["http://elasticsearch:9200"])
Person.init()
jane = Person(meta={"id": '1'}, first_name="Jane", last_name="Doe", birth_place="Springfield")
jane.save()
Person._index.refresh()
self.assertTrue(Person.exists(id='1'))
def test_query_field_all(self):
"""
Issue a query into the `full_name` field, which is populated via copy_to. We should find
the Jane Doe document
"""
search = Person.search()
search = search.query("match", all="jane")
results = search.execute()
self.assertEqual(len(results), 1)
self.assertEqual(results[0].first_name, "Jane")
def test_retrieve_field_all(self):
"""
Inspect the contents of the index, verify that the `all` stored field is populated with the values
of the `first_name`, `last_name`, and `birth_place` fields.
"""
search = Person.search()
search = search.update_from_dict({"stored_fields": ["all", "full_name"]})
results = search.execute()
self.assertEqual(len(results), 1)
fields = results.to_dict()["hits"]["hits"][0]["fields"]
self.assertIn("all", fields)
expected = {"Jane", "Doe", "Springfield"}
actual = set(fields["all"])
self.assertEqual(actual, expected)
def test_query_field_full_name(self):
"""
Issue a query into the `full_name` field, which is populated via copy_to. We should find
the Jane Doe document
"""
search = Person.search()
search = search.query("match", full_name="jane")
results = search.execute()
self.assertEqual(len(results), 1)
self.assertEqual(results[0].first_name, "Jane")
def test_retrieve_field_full_name(self):
"""
Inspect the contents of the index, verify that the `full_name` stored field is populated with the values
of the `first_name` and `last_name` fields.
"""
search = Person.search()
search = search.update_from_dict({"stored_fields": ["all", "full_name"]})
results = search.execute()
self.assertEqual(len(results), 1)
fields = results.to_dict()["hits"]["hits"][0]["fields"]
self.assertIn("full_name", fields)
expected = {"Jane", "Doe"}
actual = set(fields["full_name"])
self.assertEqual(actual, expected)
def tearDown(self):
Person._index.delete()