spring-projects · sobychacko · Oct 7, 2024
diff --git a/...-gemini/src/main/java/org/springframework/ai/vertexai/gemini/VertexAiGeminiChatModel.java b/...-gemini/src/main/java/org/springframework/ai/vertexai/gemini/VertexAiGeminiChatModel.java
@@ -37,6 +37,7 @@
 import org.springframework.ai.chat.model.ChatModel;
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.model.Generation;
+import org.springframework.ai.chat.model.MessageAggregator;
 import org.springframework.ai.chat.observation.ChatModelObservationContext;
 import org.springframework.ai.chat.observation.ChatModelObservationConvention;
 import org.springframework.ai.chat.observation.ChatModelObservationDocumentation;
@@ -59,7 +60,9 @@
 import org.springframework.util.CollectionUtils;
 import org.springframework.util.StringUtils;
 
+import io.micrometer.observation.Observation;
 import io.micrometer.observation.ObservationRegistry;
+import io.micrometer.observation.contextpropagation.ObservationThreadLocalAccessor;
 import reactor.core.publisher.Flux;
 
 import java.util.ArrayList;
@@ -101,7 +104,7 @@ public class VertexAiGeminiChatModel extends AbstractToolCallSupport implements
 	/**
 	 * Conventions to use for generating observations.
 	 */
-	private final ChatModelObservationConvention observationConvention = DEFAULT_OBSERVATION_CONVENTION;
+	private ChatModelObservationConvention observationConvention = DEFAULT_OBSERVATION_CONVENTION;
 
 	public enum GeminiMessageType {
 
@@ -242,38 +245,58 @@ public ChatResponse call(Prompt prompt) {
 
 	@Override
 	public Flux<ChatResponse> stream(Prompt prompt) {
-		try {
-
+		return Flux.deferContextual(contextView -> {
 			VertexAiGeminiChatOptions vertexAiGeminiChatOptions = vertexAiGeminiChatOptions(prompt);
-			var request = createGeminiRequest(prompt, vertexAiGeminiChatOptions);
 
-			ResponseStream<GenerateContentResponse> responseStream = request.model
-				.generateContentStream(request.contents);
-
-			return Flux.fromStream(responseStream.stream()).switchMap(response -> {
-
-				List<Generation> generations = response.getCandidatesList()
-					.stream()
-					.map(this::responseCandiateToGeneration)
-					.flatMap(List::stream)
-					.toList();
+			ChatModelObservationContext observationContext = ChatModelObservationContext.builder()
+				.prompt(prompt)
+				.provider(VertexAiGeminiConstants.PROVIDER_NAME)
+				.requestOptions(vertexAiGeminiChatOptions)
+				.build();
 
-				ChatResponse chatResponse = new ChatResponse(generations, toChatResponseMetadata(response));
+			Observation observation = ChatModelObservationDocumentation.CHAT_MODEL_OPERATION.observation(
+					this.observationConvention, DEFAULT_OBSERVATION_CONVENTION, () -> observationContext,
+					this.observationRegistry);
 
-				if (!isProxyToolCalls(prompt, this.defaultOptions) && isToolCall(chatResponse,
-						Set.of(FinishReason.STOP.name(), FinishReason.FINISH_REASON_UNSPECIFIED.name()))) {
-					var toolCallConversation = handleToolCalls(prompt, chatResponse);
-					// Recursively call the stream method with the tool call message
-					// conversation that contains the call responses.
-					return this.stream(new Prompt(toolCallConversation, prompt.getOptions()));
-				}
+			observation.parentObservation(contextView.getOrDefault(ObservationThreadLocalAccessor.KEY, null)).start();
+			var request = createGeminiRequest(prompt, vertexAiGeminiChatOptions);
 
-				return Flux.just(chatResponse);
-			});
-		}
-		catch (Exception e) {
-			throw new RuntimeException("Failed to generate content", e);
-		}
+			try {
+				ResponseStream<GenerateContentResponse> responseStream = request.model
+					.generateContentStream(request.contents);
+
+				return Flux.fromStream(responseStream.stream()).switchMap(response -> {
+
+					List<Generation> generations = response.getCandidatesList()
+						.stream()
+						.map(this::responseCandiateToGeneration)
+						.flatMap(List::stream)
+						.toList();
+
+					ChatResponse chatResponse = new ChatResponse(generations, toChatResponseMetadata(response));
+
+					if (!isProxyToolCalls(prompt, this.defaultOptions) && isToolCall(chatResponse,
+							Set.of(FinishReason.STOP.name(), FinishReason.FINISH_REASON_UNSPECIFIED.name()))) {
+						var toolCallConversation = handleToolCalls(prompt, chatResponse);
+						// Recursively call the stream method with the tool call message
+						// conversation that contains the call responses.
+						return this.stream(new Prompt(toolCallConversation, prompt.getOptions()));
+					}
+
+					Flux<ChatResponse> chatResponseFlux = Flux.just(chatResponse)
+						.doOnError(observation::error)
+						.doFinally(s -> {
+							observation.stop();
+						})
+						.contextWrite(ctx -> ctx.put(ObservationThreadLocalAccessor.KEY, observation));
+
+					return new MessageAggregator().aggregate(chatResponseFlux, observationContext::setResponse);
+				});
+			}
+			catch (Exception e) {
+				throw new RuntimeException("Failed to generate content", e);
+			}
+		});
 	}
 
 	protected List<Generation> responseCandiateToGeneration(Candidate candidate) {
@@ -619,4 +642,13 @@ public void destroy() throws Exception {
 		}
 	}
 
+	/**
+	 * Use the provided convention for reporting observation data
+	 * @param observationConvention The provided convention
+	 */
+	public void setObservationConvention(ChatModelObservationConvention observationConvention) {
+		Assert.notNull(observationConvention, "observationConvention cannot be null");
+		this.observationConvention = observationConvention;
+	}
+
 }
diff --git a/.../src/test/java/org/springframework/ai/vertexai/gemini/VertexAiChatModelObservationIT.java b/.../src/test/java/org/springframework/ai/vertexai/gemini/VertexAiChatModelObservationIT.java
@@ -19,6 +19,7 @@
 import static org.assertj.core.api.Assertions.assertThat;
 
 import java.util.List;
+import java.util.stream.Collectors;
 
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -41,6 +42,7 @@
 import com.google.cloud.vertexai.VertexAI;
 import io.micrometer.observation.tck.TestObservationRegistry;
 import io.micrometer.observation.tck.TestObservationRegistryAssert;
+import reactor.core.publisher.Flux;
 
 /**
  * @author Soby Chacko
@@ -83,6 +85,38 @@ void observationForChatOperation() {
 		validate(responseMetadata);
 	}
 
+	@Test
+	void observationForStreamingOperation() {
+
+		var options = VertexAiGeminiChatOptions.builder()
+			.withModel(VertexAiGeminiChatModel.ChatModel.GEMINI_1_5_PRO.getValue())
+			.withTemperature(0.7)
+			.withStopSequences(List.of("this-is-the-end"))
+			.withMaxOutputTokens(2048)
+			.withTopP(1.0)
+			.build();
+
+		Prompt prompt = new Prompt("Why does a raven look like a desk?", options);
+
+		Flux<ChatResponse> chatResponse = chatModel.stream(prompt);
+		List<ChatResponse> responses = chatResponse.collectList().block();
+		assertThat(responses).isNotEmpty();
+		assertThat(responses).hasSizeGreaterThan(1);
+
+		String aggregatedResponse = responses.subList(0, responses.size() - 1)
+			.stream()
+			.map(r -> r.getResult().getOutput().getContent())
+			.collect(Collectors.joining());
+		assertThat(aggregatedResponse).isNotEmpty();
+
+		ChatResponse lastChatResponse = responses.get(responses.size() - 1);
+
+		ChatResponseMetadata responseMetadata = lastChatResponse.getMetadata();
+		assertThat(responseMetadata).isNotNull();
+
+		validate(responseMetadata);
+	}
+
 	private void validate(ChatResponseMetadata responseMetadata) {
 		TestObservationRegistryAssert.assertThat(observationRegistry)
 			.doesNotHaveAnyRemainingCurrentObservation()

diff --git a/...org/springframework/ai/autoconfigure/vertexai/gemini/VertexAiGeminiAutoConfiguration.java b/...org/springframework/ai/autoconfigure/vertexai/gemini/VertexAiGeminiAutoConfiguration.java
@@ -19,10 +19,12 @@
 import java.util.List;
 
 import org.springframework.ai.autoconfigure.retry.SpringAiRetryAutoConfiguration;
+import org.springframework.ai.chat.observation.ChatModelObservationConvention;
 import org.springframework.ai.model.function.FunctionCallback;
 import org.springframework.ai.model.function.FunctionCallbackContext;
 import org.springframework.ai.model.function.FunctionCallbackContext.SchemaType;
 import org.springframework.ai.vertexai.gemini.VertexAiGeminiChatModel;
+import org.springframework.beans.factory.ObjectProvider;
 import org.springframework.boot.autoconfigure.AutoConfiguration;
 import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
@@ -38,6 +40,7 @@
 
 import com.google.auth.oauth2.GoogleCredentials;
 import com.google.cloud.vertexai.VertexAI;
+import io.micrometer.observation.ObservationRegistry;
 
 /**
  * Auto-configuration for Vertex AI Gemini Chat.
@@ -86,12 +89,17 @@ public VertexAI vertexAi(VertexAiGeminiConnectionProperties connectionProperties
 	@ConditionalOnProperty(prefix = VertexAiGeminiChatProperties.CONFIG_PREFIX, name = "enabled", havingValue = "true",
 			matchIfMissing = true)
 	public VertexAiGeminiChatModel vertexAiGeminiChat(VertexAI vertexAi, VertexAiGeminiChatProperties chatProperties,
-			List<FunctionCallback> toolFunctionCallbacks, ApplicationContext context, RetryTemplate retryTemplate) {
+			List<FunctionCallback> toolFunctionCallbacks, ApplicationContext context, RetryTemplate retryTemplate,
+			ObjectProvider<ObservationRegistry> observationRegistry,
+			ObjectProvider<ChatModelObservationConvention> observationConvention) {
 
 		FunctionCallbackContext functionCallbackContext = springAiFunctionManager(context);
 
-		return new VertexAiGeminiChatModel(vertexAi, chatProperties.getOptions(), functionCallbackContext,
-				toolFunctionCallbacks, retryTemplate);
+		VertexAiGeminiChatModel chatModel = new VertexAiGeminiChatModel(vertexAi, chatProperties.getOptions(),
+				functionCallbackContext, toolFunctionCallbacks, retryTemplate,
+				observationRegistry.getIfUnique(() -> ObservationRegistry.NOOP));
+		observationConvention.ifAvailable(chatModel::setObservationConvention);
+		return chatModel;
 	}
 
 	/**