Skip to content

Return distinct items from GetMany and SourceMany #4353

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Elasticsearch.Net.Utf8Json;

Expand All @@ -20,12 +21,28 @@ public void Serialize(ref JsonWriter writer, IMultiGetRequest value, IJsonFormat
return;
}

var docs = value.Documents.Select(d =>
{
if (value.Index != null) d.Index = null;
return d;
})
.ToList();
List<IMultiGetOperation> docs;

// if an index is specified at the request level and all documents have the same index, remove the index
if (value.Index != null)
{
var settings = formatterResolver.GetConnectionSettings();
var resolvedIndex = value.Index.GetString(settings);
docs = value.Documents.Select(d =>
{
if (d.Index == null)
return d;

// TODO: not nice to resolve index for each doc here for comparison, only for it to be resolved later in serialization.
// Might be better to simply remove the flattening logic.
var docIndex = d.Index.GetString(settings);
if (string.Equals(resolvedIndex, docIndex)) d.Index = null;
return d;
})
.ToList();
}
else
docs = value.Documents.ToList();

var flatten = docs.All(p => p.CanBeFlattened);

Expand All @@ -41,11 +58,11 @@ public void Serialize(ref JsonWriter writer, IMultiGetRequest value, IJsonFormat
if (index > 0)
writer.WriteValueSeparator();

var id = docs[index];
var doc = docs[index];
if (flatten)
IdFormatter.Serialize(ref writer, id.Id, formatterResolver);
IdFormatter.Serialize(ref writer, doc.Id, formatterResolver);
else
formatter.Serialize(ref writer, id, formatterResolver);
formatter.Serialize(ref writer, doc, formatterResolver);
}
writer.WriteEndArray();
writer.WriteEndObject();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ public MultiGetResponse Deserialize(ref JsonReader reader, IJsonFormatterResolve
responses.Add(reader.ReadNextBlockSegment());
break;
}

// skip any other properties that are not "docs"
reader.ReadNextBlock();
}

if (responses.Count == 0)
Expand Down
42 changes: 30 additions & 12 deletions src/Nest/Document/Multiple/MultiGet/Response/MultiGetResponse.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,28 @@ public FieldValues GetFieldValues<T>(string id) where T : class
return multiHit?.Fields ?? FieldValues.Empty;
}

/// <summary>
/// Retrieves the hits for each distinct id.
/// </summary>
/// <param name="ids">The ids to retrieve source for</param>
/// <typeparam name="T">The document type for the hits to return</typeparam>
/// <returns>An IEnumerable{T} of hits</returns>
public IEnumerable<IMultiGetHit<T>> GetMany<T>(IEnumerable<string> ids) where T : class
{
var docs = Hits.OfType<IMultiGetHit<T>>();
return from d in docs
join id in ids on d.Id equals id
select d;
HashSet<string> seenIndices = null;
foreach (var id in ids.Distinct())
{
if (seenIndices == null)
seenIndices = new HashSet<string>();
else
seenIndices.Clear();

foreach (var doc in Hits.OfType<IMultiGetHit<T>>())
{
if (string.Equals(doc.Id, id) && seenIndices.Add(doc.Index))
yield return doc;
}
}
}

public IEnumerable<IMultiGetHit<T>> GetMany<T>(IEnumerable<long> ids) where T : class =>
Expand All @@ -46,14 +62,16 @@ public T Source<T>(string id) where T : class

public T Source<T>(long id) where T : class => Source<T>(id.ToString(CultureInfo.InvariantCulture));

public IEnumerable<T> SourceMany<T>(IEnumerable<string> ids) where T : class
{
var docs = Hits.OfType<IMultiGetHit<T>>();
return from d in docs
join id in ids on d.Id equals id
where d.Found
select d.Source;
}
/// <summary>
/// Retrieves the source, if available, for each distinct id.
/// </summary>
/// <param name="ids">The ids to retrieve source for</param>
/// <typeparam name="T">The document type for the hits to return</typeparam>
/// <returns>An IEnumerable{T} of sources</returns>
public IEnumerable<T> SourceMany<T>(IEnumerable<string> ids) where T : class =>
from hit in GetMany<T>(ids)
where hit.Found
select hit.Source;

public IEnumerable<T> SourceMany<T>(IEnumerable<long> ids) where T : class =>
SourceMany<T>(ids.Select(i => i.ToString(CultureInfo.InvariantCulture)));
Expand Down
128 changes: 126 additions & 2 deletions tests/Tests/Document/Multiple/MultiGet/GetManyApiTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Threading.Tasks;
using Elastic.Xunit.XunitPlumbing;
Expand All @@ -12,12 +13,12 @@

namespace Tests.Document.Multiple.MultiGet
{
public class GetManyApiTests : IClusterFixture<ReadOnlyCluster>
public class GetManyApiTests : IClusterFixture<WritableCluster>
{
private readonly IElasticClient _client;
private readonly IEnumerable<long> _ids = Developer.Developers.Select(d => d.Id).Take(10);

public GetManyApiTests(ReadOnlyCluster cluster) => _client = cluster.Client;
public GetManyApiTests(WritableCluster cluster) => _client = cluster.Client;

[I] public void UsesDefaultIndexAndInferredType()
{
Expand All @@ -43,6 +44,129 @@ [I] public async Task UsesDefaultIndexAndInferredTypeAsync()
}
}

[I] public async Task ReturnsDocMatchingDistinctIds()
{
var id = _ids.First();

var response = await _client.GetManyAsync<Developer>(new[] { id, id, id });
response.Count().Should().Be(1);
foreach (var hit in response)
{
hit.Index.Should().NotBeNullOrWhiteSpace();
hit.Id.Should().Be(id.ToString(CultureInfo.InvariantCulture));
hit.Found.Should().BeTrue();
}
}

[I] public void ReturnsDocsMatchingDistinctIdsFromDifferentIndices()
{
var developerIndex = Nest.Indices.Index<Developer>();
var indexName = developerIndex.GetString(_client.ConnectionSettings);
var reindexName = $"{indexName}-getmany-distinctids";

var reindexResponse = _client.ReindexOnServer(r => r
.Source(s => s
.Index(developerIndex)
.Query<Developer>(q => q
.Ids(ids => ids.Values(_ids))
)
)
.Destination(d => d
.Index(reindexName))
.Refresh()
);

if (!reindexResponse.IsValid)
throw new Exception($"problem reindexing documents for integration test: {reindexResponse.DebugInformation}");

var id = _ids.First();

var multiGetResponse = _client.MultiGet(s => s
.RequestConfiguration(r => r.ThrowExceptions())
.Get<Developer>(m => m
.Id(id)
.Index(indexName)
)
.Get<Developer>(m => m
.Id(id)
.Index(reindexName)
)
);

var response = multiGetResponse.GetMany<Developer>(new [] { id, id });

response.Count().Should().Be(2);
foreach (var hit in response)
{
hit.Index.Should().NotBeNullOrWhiteSpace();
hit.Id.Should().NotBeNullOrWhiteSpace();
hit.Found.Should().BeTrue();
}
}

[I] public void ReturnsDocsMatchingDistinctIdsFromDifferentIndicesWithRequestLevelIndex()
{
var developerIndex = Nest.Indices.Index<Developer>();
var indexName = developerIndex.GetString(_client.ConnectionSettings);
var reindexName = $"{indexName}-getmany-distinctidsindex";

var reindexResponse = _client.ReindexOnServer(r => r
.Source(s => s
.Index(developerIndex)
.Query<Developer>(q => q
.Ids(ids => ids.Values(_ids))
)
)
.Destination(d => d
.Index(reindexName))
.Refresh()
);

if (!reindexResponse.IsValid)
throw new Exception($"problem reindexing documents for integration test: {reindexResponse.DebugInformation}");

var id = _ids.First();

var multiGetResponse = _client.MultiGet(s => s
.Index(indexName)
.RequestConfiguration(r => r.ThrowExceptions())
.Get<Developer>(m => m
.Id(id)
)
.Get<Developer>(m => m
.Id(id)
.Index(reindexName)
)
);

var response = multiGetResponse.GetMany<Developer>(new [] { id, id });

response.Count().Should().Be(2);
var seenIndices = new HashSet<string>(2);

foreach (var hit in response)
{
hit.Index.Should().NotBeNullOrWhiteSpace();
seenIndices.Add(hit.Index);
hit.Id.Should().NotBeNullOrWhiteSpace();
hit.Found.Should().BeTrue();
}

seenIndices.Should().HaveCount(2).And.Contain(new [] { indexName, reindexName });
}

[I] public async Task ReturnsSourceMatchingDistinctIds()
{
var id = _ids.First();

var sources = await _client.SourceManyAsync<Developer>(new[] { id, id, id });
sources.Count().Should().Be(1);
foreach (var hit in sources)
{
hit.Id.Should().Be(id);
}
}

[I] public async Task CanHandleNotFoundResponses()
{
var response = await _client.GetManyAsync<Developer>(_ids.Select(i => i * 100));
Expand Down