Add support for stream usage in Azure OpenAi

andresssantos · andresssantos · commit 8fa1dbcede9c · 2025-04-23T09:46:19.000-03:00
Signed-off-by: Andres da Silva Santos &lt;40636137+andresssantos@users.noreply.github.com&gt;
diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptions.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptions.java
@@ -259,6 +259,7 @@ public static AzureOpenAiChatOptions fromOptions(AzureOpenAiChatOptions fromOpti
 					fromOptions.getToolCallbacks() != null ? new ArrayList<>(fromOptions.getToolCallbacks()) : null)
 			.toolNames(fromOptions.getToolNames() != null ? new HashSet<>(fromOptions.getToolNames()) : null)
 			.responseFormat(fromOptions.getResponseFormat())
+			.streamUsage(fromOptions.getStreamUsage())
 			.seed(fromOptions.getSeed())
 			.logprobs(fromOptions.isLogprobs())
 			.topLogprobs(fromOptions.getTopLogProbs())
@@ -391,6 +392,14 @@ public void setResponseFormat(AzureOpenAiResponseFormat responseFormat) {
 		this.responseFormat = responseFormat;
 	}
 
+	public Boolean getStreamUsage() {
+		return this.streamOptions != null;
+	}
+
+	public void setStreamUsage(Boolean enableStreamUsage) {
+		this.streamOptions = (enableStreamUsage) ? new ChatCompletionStreamOptions().setIncludeUsage(true) : null;
+	}
+
 	@Override
 	@JsonIgnore
 	public Integer getTopK() {
@@ -553,6 +562,12 @@ public Builder responseFormat(AzureOpenAiResponseFormat responseFormat) {
 			return this;
 		}
 
+		public Builder streamUsage(boolean enableStreamUsage) {
+			this.options.streamOptions = (enableStreamUsage) ? new ChatCompletionStreamOptions().setIncludeUsage(true)
+					: null;
+			return this;
+		}
+
 		public Builder seed(Long seed) {
 			this.options.seed = seed;
 			return this;
diff --git a/models/spring-ai-azure-openai/src/test/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptionsTests.java b/models/spring-ai-azure-openai/src/test/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptionsTests.java
@@ -56,6 +56,7 @@ void testBuilderWithAllFields() {
 			.topP(0.9)
 			.user("test-user")
 			.responseFormat(responseFormat)
+			.streamUsage(true)
 			.seed(12345L)
 			.logprobs(true)
 			.topLogprobs(5)
@@ -65,11 +66,11 @@ void testBuilderWithAllFields() {
 
 		assertThat(options)
 			.extracting("deploymentName", "frequencyPenalty", "logitBias", "maxTokens", "n", "presencePenalty", "stop",
-					"temperature", "topP", "user", "responseFormat", "seed", "logprobs", "topLogProbs", "enhancements",
-					"streamOptions")
+					"temperature", "topP", "user", "responseFormat", "streamUsage", "seed", "logprobs", "topLogProbs",
+					"enhancements", "streamOptions")
 			.containsExactly("test-deployment", 0.5, Map.of("token1", 1, "token2", -1), 200, 2, 0.8,
-					List.of("stop1", "stop2"), 0.7, 0.9, "test-user", responseFormat, 12345L, true, 5, enhancements,
-					streamOptions);
+					List.of("stop1", "stop2"), 0.7, 0.9, "test-user", responseFormat, true, 12345L, true, 5,
+					enhancements, streamOptions);
 	}
 
 	@Test
@@ -94,6 +95,7 @@ void testCopy() {
 			.topP(0.9)
 			.user("test-user")
 			.responseFormat(responseFormat)
+			.streamUsage(true)
 			.seed(12345L)
 			.logprobs(true)
 			.topLogprobs(5)
@@ -128,6 +130,7 @@ void testSetters() {
 		options.setTopP(0.9);
 		options.setUser("test-user");
 		options.setResponseFormat(responseFormat);
+		options.setStreamUsage(true);
 		options.setSeed(12345L);
 		options.setLogprobs(true);
 		options.setTopLogProbs(5);
@@ -148,6 +151,7 @@ void testSetters() {
 		assertThat(options.getTopP()).isEqualTo(0.9);
 		assertThat(options.getUser()).isEqualTo("test-user");
 		assertThat(options.getResponseFormat()).isEqualTo(responseFormat);
+		assertThat(options.getStreamUsage()).isTrue();
 		assertThat(options.getSeed()).isEqualTo(12345L);
 		assertThat(options.isLogprobs()).isTrue();
 		assertThat(options.getTopLogProbs()).isEqualTo(5);
@@ -171,6 +175,7 @@ void testDefaultValues() {
 		assertThat(options.getTopP()).isNull();
 		assertThat(options.getUser()).isNull();
 		assertThat(options.getResponseFormat()).isNull();
+		assertThat(options.getStreamUsage()).isFalse();
 		assertThat(options.getSeed()).isNull();
 		assertThat(options.isLogprobs()).isNull();
 		assertThat(options.getTopLogProbs()).isNull();
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/azure-openai-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/azure-openai-chat.adoc
@@ -185,6 +185,7 @@ Deployments model name to provide as part of this completions request. | gpt-4o
 | spring.ai.azure.openai.chat.options.topP | An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. | -
 | spring.ai.azure.openai.chat.options.logitBias | A map between GPT token IDs and bias scores that influences the probability of specific tokens appearing in a completions response. Token IDs are computed via external tokenizer tools, while bias scores reside in the range of -100 to 100 with minimum and maximum values corresponding to a full ban or exclusive selection of a token, respectively. The exact behavior of a given bias score varies by model. | -
 | spring.ai.azure.openai.chat.options.user | An identifier for the caller or end user of the operation. This may be used for tracking or rate-limiting purposes. | -
+| spring.ai.azure.openai.chat.options.stream-usage | (For streaming only) Set to add an additional chunk with token usage statistics for the entire request. The `choices` field for this chunk is an empty array and all other chunks will also include a usage field, but with a null value. | false
 | spring.ai.azure.openai.chat.options.n | The number of chat completions choices that should be generated for a chat completions response. | -
 | spring.ai.azure.openai.chat.options.stop | A collection of textual sequences that will end completions generation. | -
 | spring.ai.azure.openai.chat.options.presencePenalty |  A value that influences the probability of generated tokens appearing based on their existing presence in generated text. Positive values will make tokens less likely to appear when they already exist and increase the model's likelihood to output new topics. | -