Skip to content

Wrong parsing of repeated structured properties #757

Open
@nikita-davydov

Description

@nikita-davydov

Short-reproducible example

from google.cloud import ndb


client = ndb.Client()


class ModelD(ndb.Model):
    field_d = ndb.TextProperty()


class ModelC(ndb.Model):
    field_c_1 = ndb.StringProperty()
    field_c_2 = ndb.TextProperty()
    field_c_3 = ndb.TextProperty()
    field_c_4 = ndb.TextProperty()


class ModelB(ndb.Model):
    field_b_1 = ndb.StringProperty()
    field_b_2 = ndb.StructuredProperty(ModelC)


class ModelA(ndb.Model):
    field_a_1 = ndb.StringProperty()
    field_a_2 = ndb.StructuredProperty(ModelD)
    field_a_3 = ndb.StructuredProperty(ModelB, repeated=True)


def load_data():
    with client.context():
        model_Cs = [
            ModelC(
                field_c_1='value_c_1_1',
                field_c_2='value_c_2_1',
                field_c_3='value_c_3_1',
                field_c_4='value_c_4_1'
            ),
            ModelC(
                field_c_1='value_c_1_2',
                field_c_2='value_c_2_2',
                field_c_3='value_c_3_2',
                field_c_4='value_c_4_2'
            )
        ]

        model_Bs = [
            ModelB(field_b_1='b1', field_b_2=model_Cs[0]),
            ModelB(field_b_1='b2'),  # NULL VALUE FOR field_b_2, This is the issue!
            ModelB(field_b_1='b3', field_b_2=model_Cs[0])
        ]

        model_a = ModelA(field_a_1='a1', field_a_2=ModelD(field_d='d1'), field_a_3=model_Bs)
        model_a.put()
    return model_a.key.id()


def assert_bug():
    id_ = load_data()
    number_of_assertions_errors = 0
    number_of_kind_errors = 0
    for i in range(30):
        try:
            with client.context():
                result = ModelA.get_by_id(id_, use_cache=False, use_global_cache=False)
                result1 = ModelA.get_by_id(id_, use_cache=False, use_global_cache=False)
                assert result == result1
        except AssertionError:
            number_of_assertions_errors += 1
        except ndb.KindError:
            number_of_kind_errors += 1

    print(f'Number of assertions errors {number_of_assertions_errors}')
    print(f'Number of kind errors {number_of_kind_errors}')


if __name__ == '__main__':
    assert_bug()

Example of prints, they are different time to time

Number of assertions errors 13
Number of kind errors 16
Number of assertions errors 11
Number of kind errors 17

The case is that in raw datastore response it returns
e.g.
{'field_a_3.field_b_2': [None], 'field_a_3.field_b_2.field_c_1': ['value_c_1_1', 'value_c_1_2']}

And it confuses parser time to time and it raises KindError, sometimes parser could done his job but it parses not all values, so some of fields of ModelC could be with null values, but in the DB they're not null

Seems like the fix is to sort keys that we receive from google cloud datastore API

Metadata

Metadata

Assignees

No one assigned

    Labels

    api: datastoreIssues related to the googleapis/python-ndb API.priority: p2Moderately-important priority. Fix may not be included in next release.type: bugError or flaw in code with unintended results or allowing sub-optimal usage patterns.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions