Skip to content

Commit 82b75f9

Browse files
committed
Return distinct items from GetMany and SourceMany (#4353)
This commit fixes a bug where a GetMany or SourceMany API call with a repeated id would return a cartesian product of id and documents. - enumerate only distinct ids when retrieving hits or source from GetMany and SourceMany, so that the same id input to either will return only a single document per target index. - fix a bug in the MultiGetRequestFormatter whereby the document index is removed when a request index is specified, without checking whether the document index matches the request index. Fixes #4342 (cherry picked from commit 8cbc1fe)
1 parent 4dc0e83 commit 82b75f9

File tree

4 files changed

+185
-23
lines changed

4 files changed

+185
-23
lines changed

src/Nest/Document/Multiple/MultiGet/Request/MultiGetRequestJsonConverter.cs renamed to src/Nest/Document/Multiple/MultiGet/Request/MultiGetRequestFormatter.cs

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.Linq;
34
using Elasticsearch.Net.Utf8Json;
45

@@ -20,12 +21,28 @@ public void Serialize(ref JsonWriter writer, IMultiGetRequest value, IJsonFormat
2021
return;
2122
}
2223

23-
var docs = value.Documents.Select(d =>
24-
{
25-
if (value.Index != null) d.Index = null;
26-
return d;
27-
})
28-
.ToList();
24+
List<IMultiGetOperation> docs;
25+
26+
// if an index is specified at the request level and all documents have the same index, remove the index
27+
if (value.Index != null)
28+
{
29+
var settings = formatterResolver.GetConnectionSettings();
30+
var resolvedIndex = value.Index.GetString(settings);
31+
docs = value.Documents.Select(d =>
32+
{
33+
if (d.Index == null)
34+
return d;
35+
36+
// TODO: not nice to resolve index for each doc here for comparison, only for it to be resolved later in serialization.
37+
// Might be better to simply remove the flattening logic.
38+
var docIndex = d.Index.GetString(settings);
39+
if (string.Equals(resolvedIndex, docIndex)) d.Index = null;
40+
return d;
41+
})
42+
.ToList();
43+
}
44+
else
45+
docs = value.Documents.ToList();
2946

3047
var flatten = docs.All(p => p.CanBeFlattened);
3148

@@ -41,11 +58,11 @@ public void Serialize(ref JsonWriter writer, IMultiGetRequest value, IJsonFormat
4158
if (index > 0)
4259
writer.WriteValueSeparator();
4360

44-
var id = docs[index];
61+
var doc = docs[index];
4562
if (flatten)
46-
IdFormatter.Serialize(ref writer, id.Id, formatterResolver);
63+
IdFormatter.Serialize(ref writer, doc.Id, formatterResolver);
4764
else
48-
formatter.Serialize(ref writer, id, formatterResolver);
65+
formatter.Serialize(ref writer, doc, formatterResolver);
4966
}
5067
writer.WriteEndArray();
5168
writer.WriteEndObject();

src/Nest/Document/Multiple/MultiGet/Response/MultiGetHitJsonConverter.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ public MultiGetResponse Deserialize(ref JsonReader reader, IJsonFormatterResolve
3636
responses.Add(reader.ReadNextBlockSegment());
3737
break;
3838
}
39+
40+
// skip any other properties that are not "docs"
41+
reader.ReadNextBlock();
3942
}
4043

4144
if (responses.Count == 0)

src/Nest/Document/Multiple/MultiGet/Response/MultiGetResponse.cs

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,28 @@ public FieldValues GetFieldValues<T>(string id) where T : class
2727
return multiHit?.Fields ?? FieldValues.Empty;
2828
}
2929

30+
/// <summary>
31+
/// Retrieves the hits for each distinct id.
32+
/// </summary>
33+
/// <param name="ids">The ids to retrieve source for</param>
34+
/// <typeparam name="T">The document type for the hits to return</typeparam>
35+
/// <returns>An IEnumerable{T} of hits</returns>
3036
public IEnumerable<IMultiGetHit<T>> GetMany<T>(IEnumerable<string> ids) where T : class
3137
{
32-
var docs = Hits.OfType<IMultiGetHit<T>>();
33-
return from d in docs
34-
join id in ids on d.Id equals id
35-
select d;
38+
HashSet<string> seenIndices = null;
39+
foreach (var id in ids.Distinct())
40+
{
41+
if (seenIndices == null)
42+
seenIndices = new HashSet<string>();
43+
else
44+
seenIndices.Clear();
45+
46+
foreach (var doc in Hits.OfType<IMultiGetHit<T>>())
47+
{
48+
if (string.Equals(doc.Id, id) && seenIndices.Add(doc.Index))
49+
yield return doc;
50+
}
51+
}
3652
}
3753

3854
public IEnumerable<IMultiGetHit<T>> GetMany<T>(IEnumerable<long> ids) where T : class =>
@@ -46,14 +62,16 @@ public T Source<T>(string id) where T : class
4662

4763
public T Source<T>(long id) where T : class => Source<T>(id.ToString(CultureInfo.InvariantCulture));
4864

49-
public IEnumerable<T> SourceMany<T>(IEnumerable<string> ids) where T : class
50-
{
51-
var docs = Hits.OfType<IMultiGetHit<T>>();
52-
return from d in docs
53-
join id in ids on d.Id equals id
54-
where d.Found
55-
select d.Source;
56-
}
65+
/// <summary>
66+
/// Retrieves the source, if available, for each distinct id.
67+
/// </summary>
68+
/// <param name="ids">The ids to retrieve source for</param>
69+
/// <typeparam name="T">The document type for the hits to return</typeparam>
70+
/// <returns>An IEnumerable{T} of sources</returns>
71+
public IEnumerable<T> SourceMany<T>(IEnumerable<string> ids) where T : class =>
72+
from hit in GetMany<T>(ids)
73+
where hit.Found
74+
select hit.Source;
5775

5876
public IEnumerable<T> SourceMany<T>(IEnumerable<long> ids) where T : class =>
5977
SourceMany<T>(ids.Select(i => i.ToString(CultureInfo.InvariantCulture)));

tests/Tests/Document/Multiple/MultiGet/GetManyApiTests.cs

Lines changed: 126 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Globalization;
34
using System.Linq;
45
using System.Threading.Tasks;
56
using Elastic.Xunit.XunitPlumbing;
@@ -12,12 +13,12 @@
1213

1314
namespace Tests.Document.Multiple.MultiGet
1415
{
15-
public class GetManyApiTests : IClusterFixture<ReadOnlyCluster>
16+
public class GetManyApiTests : IClusterFixture<WritableCluster>
1617
{
1718
private readonly IElasticClient _client;
1819
private readonly IEnumerable<long> _ids = Developer.Developers.Select(d => d.Id).Take(10);
1920

20-
public GetManyApiTests(ReadOnlyCluster cluster) => _client = cluster.Client;
21+
public GetManyApiTests(WritableCluster cluster) => _client = cluster.Client;
2122

2223
[I] public void UsesDefaultIndexAndInferredType()
2324
{
@@ -45,6 +46,129 @@ [I] public async Task UsesDefaultIndexAndInferredTypeAsync()
4546
}
4647
}
4748

49+
[I] public async Task ReturnsDocMatchingDistinctIds()
50+
{
51+
var id = _ids.First();
52+
53+
var response = await _client.GetManyAsync<Developer>(new[] { id, id, id });
54+
response.Count().Should().Be(1);
55+
foreach (var hit in response)
56+
{
57+
hit.Index.Should().NotBeNullOrWhiteSpace();
58+
hit.Id.Should().Be(id.ToString(CultureInfo.InvariantCulture));
59+
hit.Found.Should().BeTrue();
60+
}
61+
}
62+
63+
[I] public void ReturnsDocsMatchingDistinctIdsFromDifferentIndices()
64+
{
65+
var developerIndex = Nest.Indices.Index<Developer>();
66+
var indexName = developerIndex.GetString(_client.ConnectionSettings);
67+
var reindexName = $"{indexName}-getmany-distinctids";
68+
69+
var reindexResponse = _client.ReindexOnServer(r => r
70+
.Source(s => s
71+
.Index(developerIndex)
72+
.Query<Developer>(q => q
73+
.Ids(ids => ids.Values(_ids))
74+
)
75+
)
76+
.Destination(d => d
77+
.Index(reindexName))
78+
.Refresh()
79+
);
80+
81+
if (!reindexResponse.IsValid)
82+
throw new Exception($"problem reindexing documents for integration test: {reindexResponse.DebugInformation}");
83+
84+
var id = _ids.First();
85+
86+
var multiGetResponse = _client.MultiGet(s => s
87+
.RequestConfiguration(r => r.ThrowExceptions())
88+
.Get<Developer>(m => m
89+
.Id(id)
90+
.Index(indexName)
91+
)
92+
.Get<Developer>(m => m
93+
.Id(id)
94+
.Index(reindexName)
95+
)
96+
);
97+
98+
var response = multiGetResponse.GetMany<Developer>(new [] { id, id });
99+
100+
response.Count().Should().Be(2);
101+
foreach (var hit in response)
102+
{
103+
hit.Index.Should().NotBeNullOrWhiteSpace();
104+
hit.Id.Should().NotBeNullOrWhiteSpace();
105+
hit.Found.Should().BeTrue();
106+
}
107+
}
108+
109+
[I] public void ReturnsDocsMatchingDistinctIdsFromDifferentIndicesWithRequestLevelIndex()
110+
{
111+
var developerIndex = Nest.Indices.Index<Developer>();
112+
var indexName = developerIndex.GetString(_client.ConnectionSettings);
113+
var reindexName = $"{indexName}-getmany-distinctidsindex";
114+
115+
var reindexResponse = _client.ReindexOnServer(r => r
116+
.Source(s => s
117+
.Index(developerIndex)
118+
.Query<Developer>(q => q
119+
.Ids(ids => ids.Values(_ids))
120+
)
121+
)
122+
.Destination(d => d
123+
.Index(reindexName))
124+
.Refresh()
125+
);
126+
127+
if (!reindexResponse.IsValid)
128+
throw new Exception($"problem reindexing documents for integration test: {reindexResponse.DebugInformation}");
129+
130+
var id = _ids.First();
131+
132+
var multiGetResponse = _client.MultiGet(s => s
133+
.Index(indexName)
134+
.RequestConfiguration(r => r.ThrowExceptions())
135+
.Get<Developer>(m => m
136+
.Id(id)
137+
)
138+
.Get<Developer>(m => m
139+
.Id(id)
140+
.Index(reindexName)
141+
)
142+
);
143+
144+
var response = multiGetResponse.GetMany<Developer>(new [] { id, id });
145+
146+
response.Count().Should().Be(2);
147+
var seenIndices = new HashSet<string>(2);
148+
149+
foreach (var hit in response)
150+
{
151+
hit.Index.Should().NotBeNullOrWhiteSpace();
152+
seenIndices.Add(hit.Index);
153+
hit.Id.Should().NotBeNullOrWhiteSpace();
154+
hit.Found.Should().BeTrue();
155+
}
156+
157+
seenIndices.Should().HaveCount(2).And.Contain(new [] { indexName, reindexName });
158+
}
159+
160+
[I] public async Task ReturnsSourceMatchingDistinctIds()
161+
{
162+
var id = _ids.First();
163+
164+
var sources = await _client.SourceManyAsync<Developer>(new[] { id, id, id });
165+
sources.Count().Should().Be(1);
166+
foreach (var hit in sources)
167+
{
168+
hit.Id.Should().Be(id);
169+
}
170+
}
171+
48172
[I] public async Task CanHandleNotFoundResponses()
49173
{
50174
var response = await _client.GetManyAsync<Developer>(_ids.Select(i => i * 100));

0 commit comments

Comments
 (0)