Skip to content

Commit

Permalink
Fix codegen and some small updates (#395)
Browse files Browse the repository at this point in the history
* fix openapi yaml and runner.gen.go

* fix streaming

* cancel context if borrow container fails

* fix error message
  • Loading branch information
ad-astra-video authored Jan 14, 2025
1 parent 1ede01e commit 40700f4
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 100 deletions.
2 changes: 1 addition & 1 deletion runner/app/pipelines/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ async def generate(
input_tokens = len(tokenizer.encode(full_prompt))
if input_tokens > self.engine_args.max_model_len:
raise ValueError(
f"Input sequence length ({input_tokens}) exceeds maximum allowed ({self.engine.engine_args.max_model_len})")
f"Input sequence length ({input_tokens}) exceeds maximum allowed ({self.engine_args.max_model_len})")

total_tokens = 0
current_response = ""
Expand Down
4 changes: 3 additions & 1 deletion runner/app/routes/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ async def llm(
logger.error(f"LLM processing error: {str(e)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"detail": "Internal server error during LLM processing."}
content=http_error(
"Internal server error during LLM processing."
)
)


Expand Down
12 changes: 8 additions & 4 deletions runner/gateway.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,8 @@ components:
AudioResponse:
properties:
audio:
$ref: '#/components/schemas/MediaURL'
allOf:
- $ref: '#/components/schemas/MediaURL'
description: The generated audio.
type: object
required:
Expand Down Expand Up @@ -826,7 +827,8 @@ components:
HTTPError:
properties:
detail:
$ref: '#/components/schemas/APIError'
allOf:
- $ref: '#/components/schemas/APIError'
description: Detailed error information.
type: object
required:
Expand Down Expand Up @@ -876,9 +878,11 @@ components:
title: Finish Reason
default: ''
delta:
$ref: '#/components/schemas/LLMMessage'
allOf:
- $ref: '#/components/schemas/LLMMessage'
message:
$ref: '#/components/schemas/LLMMessage'
allOf:
- $ref: '#/components/schemas/LLMMessage'
type: object
required:
- index
Expand Down
1 change: 1 addition & 0 deletions runner/gen_openapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def write_openapi(fname: str, entrypoint: str = "runner"):
description="An application to run AI pipelines",
routes=app.routes,
servers=SERVERS,
separate_input_output_schemas=False
)

# Translate OpenAPI schema to 'gateway' side entrypoint if requested.
Expand Down
12 changes: 8 additions & 4 deletions runner/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,8 @@ components:
AudioResponse:
properties:
audio:
$ref: '#/components/schemas/MediaURL'
allOf:
- $ref: '#/components/schemas/MediaURL'
description: The generated audio.
type: object
required:
Expand Down Expand Up @@ -918,7 +919,8 @@ components:
HTTPError:
properties:
detail:
$ref: '#/components/schemas/APIError'
allOf:
- $ref: '#/components/schemas/APIError'
description: Detailed error information.
type: object
required:
Expand Down Expand Up @@ -1022,9 +1024,11 @@ components:
title: Finish Reason
default: ''
delta:
$ref: '#/components/schemas/LLMMessage'
allOf:
- $ref: '#/components/schemas/LLMMessage'
message:
$ref: '#/components/schemas/LLMMessage'
allOf:
- $ref: '#/components/schemas/LLMMessage'
type: object
required:
- index
Expand Down
170 changes: 85 additions & 85 deletions worker/runner.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 10 additions & 5 deletions worker/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,12 +399,15 @@ func (w *Worker) LLM(ctx context.Context, req GenLLMJSONRequestBody) (interface{
ctx, cancel := context.WithCancel(ctx)
c, err := w.borrowContainer(ctx, "llm", *req.Model)
if err != nil {
cancel()
return nil, err
}
if c == nil {
cancel()
return nil, errors.New("borrowed container is nil")
}
if c.Client == nil {
cancel()
return nil, errors.New("container client is nil")
}

Expand Down Expand Up @@ -781,19 +784,21 @@ func (w *Worker) handleStreamingResponse(ctx context.Context, c *RunnerContainer
default:
line := scanner.Text()
data := strings.TrimPrefix(line, "data: ")

if data == "" {
continue
}
if data == "[DONE]" {
break
}

var llmRes *LLMResponse
if err := json.Unmarshal([]byte(data), llmRes); err != nil {
slog.Error("Error unmarshaling stream data", slog.String("err", err.Error()))
var llmRes LLMResponse
if err := json.Unmarshal([]byte(data), &llmRes); err != nil {
slog.Error("Error unmarshaling stream data", slog.String("err", err.Error()), slog.String("json", data))
continue
}

select {
case outputChan <- llmRes:
case outputChan <- &llmRes:
case <-ctx.Done():
return
}
Expand Down

0 comments on commit 40700f4

Please sign in to comment.