Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support retrieval from multiple feature views with different join keys #2835

Merged
merged 5 commits into from
Jun 30, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: Support retrieving from multiple feature views
Signed-off-by: Yongheng Lin <yongheng.lin@gmail.com>
  • Loading branch information
yongheng committed Jun 24, 2022
commit 8faefa203f2a1f9198d2f9f4e16cfa03ad7417ec
15 changes: 15 additions & 0 deletions java/serving/src/main/java/feast/serving/registry/Registry.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public class Registry {
private Map<String, OnDemandFeatureViewProto.OnDemandFeatureViewSpec>
onDemandFeatureViewNameToSpec;
private final Map<String, FeatureServiceProto.FeatureServiceSpec> featureServiceNameToSpec;
private final Map<String, String> entityNameToJoinKey;

Registry(RegistryProto.Registry registry) {
this.registry = registry;
Expand Down Expand Up @@ -60,6 +61,12 @@ public class Registry {
.collect(
Collectors.toMap(
FeatureServiceProto.FeatureServiceSpec::getName, Function.identity()));
this.entityNameToJoinKey =
registry.getEntitiesList().stream()
.map(EntityProto.Entity::getSpec)
.collect(
Collectors.toMap(
EntityProto.EntitySpecV2::getName, EntityProto.EntitySpecV2::getJoinKey));
}

public RegistryProto.Registry getRegistry() {
Expand Down Expand Up @@ -115,4 +122,12 @@ public FeatureServiceProto.FeatureServiceSpec getFeatureServiceSpec(String name)
}
return spec;
}

public String getEntityJoinKey(String name) {
String joinKey = entityNameToJoinKey.get(name);
if (joinKey == null) {
throw new SpecRetrievalException(String.format("Unable to find entity with name: %s", name));
}
return joinKey;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,8 @@ public Duration getMaxAge(ServingAPIProto.FeatureReferenceV2 featureReference) {
public List<String> getEntitiesList(ServingAPIProto.FeatureReferenceV2 featureReference) {
return getFeatureViewSpec(featureReference).getEntitiesList();
}

public String getEntityJoinKey(String name) {
return this.registry.getEntityJoinKey(name);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@
import feast.serving.registry.RegistryRepository;
import feast.serving.util.Metrics;
import feast.storage.api.retriever.OnlineRetrieverV2;
import io.grpc.Status;
import io.opentracing.Span;
import io.opentracing.Tracer;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
Expand All @@ -51,6 +51,11 @@ public class OnlineServingServiceV2 implements ServingServiceV2 {
private final OnlineTransformationService onlineTransformationService;
private final String project;

public static final String DUMMY_ENTITY_ID = "__dummy_id";
public static final String DUMMY_ENTITY_VAL = "";
public static final ValueProto.Value DUMMY_ENTITY_VALUE =
ValueProto.Value.newBuilder().setStringVal(DUMMY_ENTITY_VAL).build();

public OnlineServingServiceV2(
OnlineRetrieverV2 retriever,
Tracer tracer,
Expand Down Expand Up @@ -103,31 +108,18 @@ public ServingAPIProto.GetOnlineFeaturesResponse getOnlineFeatures(

List<Map<String, ValueProto.Value>> entityRows = getEntityRows(request);

List<String> entityNames;
if (retrievedFeatureReferences.size() > 0) {
entityNames = this.registryRepository.getEntitiesList(retrievedFeatureReferences.get(0));
} else {
throw new RuntimeException("Requested features list must not be empty");
}

Span storageRetrievalSpan = tracer.buildSpan("storageRetrieval").start();
if (storageRetrievalSpan != null) {
storageRetrievalSpan.setTag("entities", entityRows.size());
storageRetrievalSpan.setTag("features", retrievedFeatureReferences.size());
}

List<List<feast.storage.api.retriever.Feature>> features =
retriever.getOnlineFeatures(entityRows, retrievedFeatureReferences, entityNames);
retrieveFeatures(retrievedFeatureReferences, entityRows);

if (storageRetrievalSpan != null) {
storageRetrievalSpan.finish();
}
if (features.size() != entityRows.size()) {
throw Status.INTERNAL
.withDescription(
"The no. of FeatureRow obtained from OnlineRetriever"
+ "does not match no. of entityRow passed.")
.asRuntimeException();
}

Span postProcessingSpan = tracer.buildSpan("postProcessing").start();

Expand Down Expand Up @@ -255,6 +247,78 @@ private List<Map<String, ValueProto.Value>> getEntityRows(
return entityRows;
}

private List<List<feast.storage.api.retriever.Feature>> retrieveFeatures(
List<FeatureReferenceV2> featureReferences, List<Map<String, ValueProto.Value>> entityRows) {
// Prepare feature reference to index mapping. This mapping will be used to arrange the
// retrieved features to the same order as in the input.
if (featureReferences.isEmpty()) {
throw new RuntimeException("Requested features list must not be empty.");
}
Map<FeatureReferenceV2, Integer> featureReferenceToIndexMap =
new HashMap<>(featureReferences.size());
for (int i = 0; i < featureReferences.size(); i++) {
FeatureReferenceV2 featureReference = featureReferences.get(i);
if (featureReferenceToIndexMap.containsKey(featureReference)) {
throw new RuntimeException(
String.format(
"Found duplicate features %s:%s.",
featureReference.getFeatureViewName(), featureReference.getFeatureName()));
}
featureReferenceToIndexMap.put(featureReference, i);
}

// Create placeholders for retrieved features.
List<List<feast.storage.api.retriever.Feature>> features = new ArrayList<>(entityRows.size());
for (int i = 0; i < entityRows.size(); i++) {
List<feast.storage.api.retriever.Feature> featuresPerEntity =
new ArrayList<>(featureReferences.size());
for (int j = 0; j < featureReferences.size(); j++) {
featuresPerEntity.add(null);
}
features.add(featuresPerEntity);
}

// Group feature references by feature view.
Map<String, List<FeatureReferenceV2>> featureViewNameToFeatureReferencesMap =
featureReferences.stream()
Copy link
Collaborator

@pyalex pyalex Jun 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To speed up this part we might want to extract distinct feature views from all feature references. And then group feature views instead.

Copy link
Contributor Author

@yongheng yongheng Jun 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC grouping by join keys results in the same or less groups (therefore same or more efficient) than grouping by feature view. The is because different feature views can have the same join keys. In L286, this.registryRepository.getEntitiesList(featureReference) internally gets feature view spec first, then gets entity names of the feature view spec, then we find join keys for the entity names.

Actually, I grouped by feature view at the beginning. Then I switched to grouping by join keys in the second commit of this PR, as an optimization.

.collect(Collectors.groupingBy(FeatureReferenceV2::getFeatureViewName));

// Retrieve features one feature view at a time.
for (List<FeatureReferenceV2> featureReferencesPerFeatureView :
featureViewNameToFeatureReferencesMap.values()) {
List<String> entityNames =
this.registryRepository.getEntitiesList(featureReferencesPerFeatureView.get(0));
List<Map<String, ValueProto.Value>> entityRowsPerFeatureView =
new ArrayList<>(entityRows.size());
for (Map<String, ValueProto.Value> entityRow : entityRows) {
Map<String, ValueProto.Value> entityRowPerFeatureView =
entityNames.stream()
.map(this.registryRepository::getEntityJoinKey)
.collect(
Collectors.toMap(
Function.identity(),
joinKey -> {
if (joinKey.equals(DUMMY_ENTITY_ID)) {
return DUMMY_ENTITY_VALUE;
}
return entityRow.get(joinKey);
}));
entityRowsPerFeatureView.add(entityRowPerFeatureView);
}
List<List<feast.storage.api.retriever.Feature>> featuresPerFeatureView =
retriever.getOnlineFeatures(
entityRowsPerFeatureView, featureReferencesPerFeatureView, entityNames);
for (int i = 0; i < featuresPerFeatureView.size(); i++) {
for (int j = 0; j < featureReferencesPerFeatureView.size(); j++) {
int k = featureReferenceToIndexMap.get(featureReferencesPerFeatureView.get(j));
features.get(i).set(k, featuresPerFeatureView.get(i).get(j));
}
}
}

return features;
}

private void populateOnDemandFeatures(
List<FeatureReferenceV2> onDemandFeatureReferences,
List<FeatureReferenceV2> onDemandFeatureSources,
Expand Down