Skip to content

Add support for stream usage in Azure OpenAi #2858

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ public static AzureOpenAiChatOptions fromOptions(AzureOpenAiChatOptions fromOpti
fromOptions.getToolCallbacks() != null ? new ArrayList<>(fromOptions.getToolCallbacks()) : null)
.toolNames(fromOptions.getToolNames() != null ? new HashSet<>(fromOptions.getToolNames()) : null)
.responseFormat(fromOptions.getResponseFormat())
.streamUsage(fromOptions.getStreamUsage())
.seed(fromOptions.getSeed())
.logprobs(fromOptions.isLogprobs())
.topLogprobs(fromOptions.getTopLogProbs())
Expand Down Expand Up @@ -391,6 +392,14 @@ public void setResponseFormat(AzureOpenAiResponseFormat responseFormat) {
this.responseFormat = responseFormat;
}

public Boolean getStreamUsage() {
return this.streamOptions != null;
}

public void setStreamUsage(Boolean enableStreamUsage) {
this.streamOptions = (enableStreamUsage) ? new ChatCompletionStreamOptions().setIncludeUsage(true) : null;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need an explicit property to set the enableStreamUsage value.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This explicit property needs to drive how the usage is included in AzureOpenAiChatModel. Please check OpenAIChatModel StreamOptions#includeUsage for reference.

}

@Override
@JsonIgnore
public Integer getTopK() {
Expand Down Expand Up @@ -553,6 +562,12 @@ public Builder responseFormat(AzureOpenAiResponseFormat responseFormat) {
return this;
}

public Builder streamUsage(boolean enableStreamUsage) {
this.options.streamOptions = (enableStreamUsage) ? new ChatCompletionStreamOptions().setIncludeUsage(true)
: null;
return this;
}

public Builder seed(Long seed) {
this.options.seed = seed;
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ void testBuilderWithAllFields() {
.topP(0.9)
.user("test-user")
.responseFormat(responseFormat)
.streamUsage(true)
.seed(12345L)
.logprobs(true)
.topLogprobs(5)
Expand All @@ -65,11 +66,11 @@ void testBuilderWithAllFields() {

assertThat(options)
.extracting("deploymentName", "frequencyPenalty", "logitBias", "maxTokens", "n", "presencePenalty", "stop",
"temperature", "topP", "user", "responseFormat", "seed", "logprobs", "topLogProbs", "enhancements",
"streamOptions")
"temperature", "topP", "user", "responseFormat", "streamUsage", "seed", "logprobs", "topLogProbs",
"enhancements", "streamOptions")
.containsExactly("test-deployment", 0.5, Map.of("token1", 1, "token2", -1), 200, 2, 0.8,
List.of("stop1", "stop2"), 0.7, 0.9, "test-user", responseFormat, 12345L, true, 5, enhancements,
streamOptions);
List.of("stop1", "stop2"), 0.7, 0.9, "test-user", responseFormat, true, 12345L, true, 5,
enhancements, streamOptions);
}

@Test
Expand All @@ -94,6 +95,7 @@ void testCopy() {
.topP(0.9)
.user("test-user")
.responseFormat(responseFormat)
.streamUsage(true)
.seed(12345L)
.logprobs(true)
.topLogprobs(5)
Expand Down Expand Up @@ -128,6 +130,7 @@ void testSetters() {
options.setTopP(0.9);
options.setUser("test-user");
options.setResponseFormat(responseFormat);
options.setStreamUsage(true);
options.setSeed(12345L);
options.setLogprobs(true);
options.setTopLogProbs(5);
Expand All @@ -148,6 +151,7 @@ void testSetters() {
assertThat(options.getTopP()).isEqualTo(0.9);
assertThat(options.getUser()).isEqualTo("test-user");
assertThat(options.getResponseFormat()).isEqualTo(responseFormat);
assertThat(options.getStreamUsage()).isTrue();
assertThat(options.getSeed()).isEqualTo(12345L);
assertThat(options.isLogprobs()).isTrue();
assertThat(options.getTopLogProbs()).isEqualTo(5);
Expand All @@ -171,6 +175,7 @@ void testDefaultValues() {
assertThat(options.getTopP()).isNull();
assertThat(options.getUser()).isNull();
assertThat(options.getResponseFormat()).isNull();
assertThat(options.getStreamUsage()).isFalse();
assertThat(options.getSeed()).isNull();
assertThat(options.isLogprobs()).isNull();
assertThat(options.getTopLogProbs()).isNull();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ Deployments model name to provide as part of this completions request. | gpt-4o
| spring.ai.azure.openai.chat.options.topP | An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. | -
| spring.ai.azure.openai.chat.options.logitBias | A map between GPT token IDs and bias scores that influences the probability of specific tokens appearing in a completions response. Token IDs are computed via external tokenizer tools, while bias scores reside in the range of -100 to 100 with minimum and maximum values corresponding to a full ban or exclusive selection of a token, respectively. The exact behavior of a given bias score varies by model. | -
| spring.ai.azure.openai.chat.options.user | An identifier for the caller or end user of the operation. This may be used for tracking or rate-limiting purposes. | -
| spring.ai.azure.openai.chat.options.stream-usage | (For streaming only) Set to add an additional chunk with token usage statistics for the entire request. The `choices` field for this chunk is an empty array and all other chunks will also include a usage field, but with a null value. | false
| spring.ai.azure.openai.chat.options.n | The number of chat completions choices that should be generated for a chat completions response. | -
| spring.ai.azure.openai.chat.options.stop | A collection of textual sequences that will end completions generation. | -
| spring.ai.azure.openai.chat.options.presencePenalty | A value that influences the probability of generated tokens appearing based on their existing presence in generated text. Positive values will make tokens less likely to appear when they already exist and increase the model's likelihood to output new topics. | -
Expand Down