15
15
using Microsoft . Extensions . Logging ;
16
16
using Microsoft . Extensions . Logging . Abstractions ;
17
17
using Microsoft . SemanticKernel . ChatCompletion ;
18
+ using Microsoft . SemanticKernel . Diagnostics ;
18
19
using Microsoft . SemanticKernel . Http ;
19
20
using Microsoft . SemanticKernel . Text ;
20
21
@@ -25,6 +26,8 @@ namespace Microsoft.SemanticKernel.Connectors.MistralAI.Client;
25
26
/// </summary>
26
27
internal sealed class MistralClient
27
28
{
29
+ private const string ModelProvider = "mistralai" ;
30
+
28
31
internal MistralClient (
29
32
string modelId ,
30
33
HttpClient httpClient ,
@@ -56,18 +59,56 @@ internal async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsy
56
59
57
60
for ( int requestIndex = 1 ; ; requestIndex ++ )
58
61
{
59
- using var httpRequestMessage = this . CreatePost ( chatRequest , endpoint , this . _apiKey , stream : false ) ;
60
- var responseData = await this . SendRequestAsync < ChatCompletionResponse > ( httpRequestMessage , cancellationToken ) . ConfigureAwait ( false ) ;
61
- if ( responseData is null || responseData . Choices is null || responseData . Choices . Count == 0 )
62
+ ChatCompletionResponse ? responseData = null ;
63
+ List < ChatMessageContent > responseContent ;
64
+ using ( var activity = ModelDiagnostics . StartCompletionActivity ( this . _endpoint , this . _modelId , ModelProvider , chatHistory , mistralExecutionSettings ) )
62
65
{
63
- throw new KernelException ( "Chat completions not found" ) ;
66
+ try
67
+ {
68
+ using var httpRequestMessage = this . CreatePost ( chatRequest , endpoint , this . _apiKey , stream : false ) ;
69
+ responseData = await this . SendRequestAsync < ChatCompletionResponse > ( httpRequestMessage , cancellationToken ) . ConfigureAwait ( false ) ;
70
+ if ( responseData is null || responseData . Choices is null || responseData . Choices . Count == 0 )
71
+ {
72
+ throw new KernelException ( "Chat completions not found" ) ;
73
+ }
74
+ }
75
+ catch ( Exception ex ) when ( activity is not null )
76
+ {
77
+ activity . SetError ( ex ) ;
78
+
79
+ // Capture available metadata even if the operation failed.
80
+ if ( responseData is not null )
81
+ {
82
+ if ( responseData . Id is string id )
83
+ {
84
+ activity . SetResponseId ( id ) ;
85
+ }
86
+
87
+ if ( responseData . Usage is MistralUsage usage )
88
+ {
89
+ if ( usage . PromptTokens is int promptTokens )
90
+ {
91
+ activity . SetPromptTokenUsage ( promptTokens ) ;
92
+ }
93
+ if ( usage . CompletionTokens is int completionTokens )
94
+ {
95
+ activity . SetCompletionTokenUsage ( completionTokens ) ;
96
+ }
97
+ }
98
+ }
99
+
100
+ throw ;
101
+ }
102
+
103
+ responseContent = this . ToChatMessageContent ( modelId , responseData ) ;
104
+ activity ? . SetCompletionResponse ( responseContent , responseData . Usage ? . PromptTokens , responseData . Usage ? . CompletionTokens ) ;
64
105
}
65
106
66
107
// If we don't want to attempt to invoke any functions, just return the result.
67
108
// Or if we are auto-invoking but we somehow end up with other than 1 choice even though only 1 was requested, similarly bail.
68
109
if ( ! autoInvoke || responseData . Choices . Count != 1 )
69
110
{
70
- return this . ToChatMessageContent ( modelId , responseData ) ;
111
+ return responseContent ;
71
112
}
72
113
73
114
// Get our single result and extract the function call information. If this isn't a function call, or if it is
@@ -78,7 +119,7 @@ internal async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsy
78
119
MistralChatChoice chatChoice = responseData . Choices [ 0 ] ; // TODO Handle multiple choices
79
120
if ( ! chatChoice . IsToolCall )
80
121
{
81
- return this . ToChatMessageContent ( modelId , responseData ) ;
122
+ return responseContent ;
82
123
}
83
124
84
125
if ( this . _logger . IsEnabled ( LogLevel . Debug ) )
@@ -237,35 +278,75 @@ internal async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMes
237
278
toolCalls ? . Clear ( ) ;
238
279
239
280
// Stream the responses
240
- var response = this . StreamChatMessageContentsAsync ( chatHistory , mistralExecutionSettings , chatRequest , modelId , cancellationToken ) ;
241
- string ? streamedRole = null ;
242
- await foreach ( var update in response . ConfigureAwait ( false ) )
281
+ using ( var activity = ModelDiagnostics . StartCompletionActivity ( this . _endpoint , this . _modelId , ModelProvider , chatHistory , mistralExecutionSettings ) )
243
282
{
244
- // If we're intending to invoke function calls, we need to consume that function call information.
245
- if ( autoInvoke )
283
+ // Make the request.
284
+ IAsyncEnumerable < StreamingChatMessageContent > response ;
285
+ try
246
286
{
247
- if ( update . InnerContent is not MistralChatCompletionChunk completionChunk || completionChunk . Choices is null || completionChunk . Choices ? . Count == 0 )
248
- {
249
- continue ;
250
- }
287
+ response = this . StreamChatMessageContentsAsync ( chatHistory , mistralExecutionSettings , chatRequest , modelId , cancellationToken ) ;
288
+ }
289
+ catch ( Exception e ) when ( activity is not null )
290
+ {
291
+ activity . SetError ( e ) ;
292
+ throw ;
293
+ }
251
294
252
- MistralChatCompletionChoice chatChoice = completionChunk ! . Choices ! [ 0 ] ; // TODO Handle multiple choices
253
- streamedRole ??= chatChoice . Delta ! . Role ;
254
- if ( chatChoice . IsToolCall )
295
+ var responseEnumerator = response . ConfigureAwait ( false ) . GetAsyncEnumerator ( ) ;
296
+ List < StreamingKernelContent > ? streamedContents = activity is not null ? [ ] : null ;
297
+ string ? streamedRole = null ;
298
+ try
299
+ {
300
+ while ( true )
255
301
{
256
- // Create a copy of the tool calls to avoid modifying the original list
257
- toolCalls = new List < MistralToolCall > ( chatChoice . ToolCalls ! ) ;
258
-
259
- // Add the original assistant message to the chatRequest; this is required for the service
260
- // to understand the tool call responses. Also add the result message to the caller's chat
261
- // history: if they don't want it, they can remove it, but this makes the data available,
262
- // including metadata like usage.
263
- chatRequest . AddMessage ( new MistralChatMessage ( streamedRole , completionChunk . GetContent ( 0 ) ) { ToolCalls = chatChoice . ToolCalls } ) ;
264
- chatHistory . Add ( this . ToChatMessageContent ( modelId , streamedRole ! , completionChunk , chatChoice ) ) ;
302
+ try
303
+ {
304
+ if ( ! await responseEnumerator . MoveNextAsync ( ) )
305
+ {
306
+ break ;
307
+ }
308
+ }
309
+ catch ( Exception ex ) when ( activity is not null )
310
+ {
311
+ activity . SetError ( ex ) ;
312
+ throw ;
313
+ }
314
+
315
+ StreamingChatMessageContent update = responseEnumerator . Current ;
316
+
317
+ // If we're intending to invoke function calls, we need to consume that function call information.
318
+ if ( autoInvoke )
319
+ {
320
+ if ( update . InnerContent is not MistralChatCompletionChunk completionChunk || completionChunk . Choices is null || completionChunk . Choices ? . Count == 0 )
321
+ {
322
+ continue ;
323
+ }
324
+
325
+ MistralChatCompletionChoice chatChoice = completionChunk ! . Choices ! [ 0 ] ; // TODO Handle multiple choices
326
+ streamedRole ??= chatChoice . Delta ! . Role ;
327
+ if ( chatChoice . IsToolCall )
328
+ {
329
+ // Create a copy of the tool calls to avoid modifying the original list
330
+ toolCalls = new List < MistralToolCall > ( chatChoice . ToolCalls ! ) ;
331
+
332
+ // Add the original assistant message to the chatRequest; this is required for the service
333
+ // to understand the tool call responses. Also add the result message to the caller's chat
334
+ // history: if they don't want it, they can remove it, but this makes the data available,
335
+ // including metadata like usage.
336
+ chatRequest . AddMessage ( new MistralChatMessage ( streamedRole , completionChunk . GetContent ( 0 ) ) { ToolCalls = chatChoice . ToolCalls } ) ;
337
+ chatHistory . Add ( this . ToChatMessageContent ( modelId , streamedRole ! , completionChunk , chatChoice ) ) ;
338
+ }
339
+ }
340
+
341
+ streamedContents ? . Add ( update ) ;
342
+ yield return update ;
265
343
}
266
344
}
267
-
268
- yield return update ;
345
+ finally
346
+ {
347
+ activity ? . EndStreaming ( streamedContents ) ;
348
+ await responseEnumerator . DisposeAsync ( ) ;
349
+ }
269
350
}
270
351
271
352
// If we don't have a function to invoke, we're done.
0 commit comments