Skip to content

DSL: copy_to should accept a list of field names #2992

@dbourgesw

Description

@dbourgesw

Our migration from the elasticsearch-dsl package to the elasticsearch package uncovered a regression in the behavior of copy_to, specifically when a field's definition includes a copy into multiple other fields (as opposed to just one other field).

The unit test below illustrates this. It passes on elasticsearch-dsl 18.17.1 and fails on elasticsearch 18.18.0 . The document class defines two copy_to fields: all should end up with a copy of first_name, last_name and birth_place, whereas full_name should end up with a copy of fist_name and last_name. Three out of the 4 test cases fail because all ends up only with the copy of birth_place and full_name is not populated.

The cause of this is the change introduced in line 283 of elasticsearch.dsl.fields.py, where the copy_to argument is forced into a string (it should be allowed to be a list of strings).

from unittest import TestCase

from elasticsearch.dsl import Document, Text
from elasticsearch.dsl.connections import connections

class Person(Document):
    full_name = Text(store=True)
    all = Text(store=True)
    first_name = Text(copy_to=["full_name", "all"])
    last_name = Text(copy_to=["full_name", "all"])
    birth_place= Text(copy_to="all")

    class Index:
        name = "people"

class CopyToTests(TestCase):
    def setUp(self):
        connections.create_connection(hosts=["http://elasticsearch:9200"])
        Person.init()
        jane = Person(meta={"id": '1'}, first_name="Jane", last_name="Doe", birth_place="Springfield")
        jane.save()
        Person._index.refresh()
        self.assertTrue(Person.exists(id='1'))

    def test_query_field_all(self):
        """
        Issue a query into the `full_name` field, which is populated via copy_to. We should find
        the Jane Doe document
        """
        search = Person.search()
        search = search.query("match", all="jane")
        results = search.execute()
        self.assertEqual(len(results), 1)
        self.assertEqual(results[0].first_name, "Jane")

    def test_retrieve_field_all(self):
        """
        Inspect the contents of the index, verify that the `all` stored field is populated with the values
        of the `first_name`, `last_name`, and `birth_place` fields.
        """
        search = Person.search()
        search = search.update_from_dict({"stored_fields": ["all", "full_name"]})
        results = search.execute()
        self.assertEqual(len(results), 1)
        fields = results.to_dict()["hits"]["hits"][0]["fields"]
        self.assertIn("all", fields)
        expected = {"Jane", "Doe", "Springfield"}
        actual = set(fields["all"])
        self.assertEqual(actual, expected)

    def test_query_field_full_name(self):
        """
        Issue a query into the `full_name` field, which is populated via copy_to. We should find
        the Jane Doe document
        """
        search = Person.search()
        search = search.query("match", full_name="jane")
        results = search.execute()
        self.assertEqual(len(results), 1)
        self.assertEqual(results[0].first_name, "Jane")

    def test_retrieve_field_full_name(self):
        """
        Inspect the contents of the index, verify that the `full_name` stored field is populated with the values
        of the `first_name` and `last_name` fields.
        """
        search = Person.search()
        search = search.update_from_dict({"stored_fields": ["all", "full_name"]})
        results = search.execute()
        self.assertEqual(len(results), 1)
        fields = results.to_dict()["hits"]["hits"][0]["fields"]
        self.assertIn("full_name", fields)
        expected = {"Jane", "Doe"}
        actual = set(fields["full_name"])
        self.assertEqual(actual, expected)

    def tearDown(self):
        Person._index.delete()

Metadata

Metadata

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions