Fix codegen and some small updates (#395)

* fix openapi yaml and runner.gen.go * fix streaming * cancel context if borrow container fails * fix error message
livepeer · Jan 14, 2025 · 40700f4 · 40700f4
1 parent 1ede01e
commit 40700f4
Show file tree

Hide file tree

Showing 7 changed files with 116 additions and 100 deletions.
diff --git a/runner/app/pipelines/llm.py b/runner/app/pipelines/llm.py
@@ -204,7 +204,7 @@ async def generate(
         input_tokens = len(tokenizer.encode(full_prompt))
         if input_tokens > self.engine_args.max_model_len:
             raise ValueError(
-                f"Input sequence length ({input_tokens}) exceeds maximum allowed ({self.engine.engine_args.max_model_len})")
+                f"Input sequence length ({input_tokens}) exceeds maximum allowed ({self.engine_args.max_model_len})")
 
         total_tokens = 0
         current_response = ""

diff --git a/runner/app/routes/llm.py b/runner/app/routes/llm.py
@@ -101,7 +101,9 @@ async def llm(
         logger.error(f"LLM processing error: {str(e)}")
         return JSONResponse(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            content={"detail": "Internal server error during LLM processing."}
+            content=http_error(
+                "Internal server error during LLM processing."
+            )
         )
 
 

diff --git a/runner/gateway.openapi.yaml b/runner/gateway.openapi.yaml
@@ -525,7 +525,8 @@ components:
     AudioResponse:
       properties:
         audio:
-          $ref: '#/components/schemas/MediaURL'
+          allOf:
+          - $ref: '#/components/schemas/MediaURL'
           description: The generated audio.
       type: object
       required:
@@ -826,7 +827,8 @@ components:
     HTTPError:
       properties:
         detail:
-          $ref: '#/components/schemas/APIError'
+          allOf:
+          - $ref: '#/components/schemas/APIError'
           description: Detailed error information.
       type: object
       required:
@@ -876,9 +878,11 @@ components:
           title: Finish Reason
           default: ''
         delta:
-          $ref: '#/components/schemas/LLMMessage'
+          allOf:
+          - $ref: '#/components/schemas/LLMMessage'
         message:
-          $ref: '#/components/schemas/LLMMessage'
+          allOf:
+          - $ref: '#/components/schemas/LLMMessage'
       type: object
       required:
       - index

diff --git a/runner/gen_openapi.py b/runner/gen_openapi.py
@@ -123,6 +123,7 @@ def write_openapi(fname: str, entrypoint: str = "runner"):
         description="An application to run AI pipelines",
         routes=app.routes,
         servers=SERVERS,
+        separate_input_output_schemas=False
     )
 
     # Translate OpenAPI schema to 'gateway' side entrypoint if requested.

diff --git a/runner/openapi.yaml b/runner/openapi.yaml
@@ -558,7 +558,8 @@ components:
     AudioResponse:
       properties:
         audio:
-          $ref: '#/components/schemas/MediaURL'
+          allOf:
+          - $ref: '#/components/schemas/MediaURL'
           description: The generated audio.
       type: object
       required:
@@ -918,7 +919,8 @@ components:
     HTTPError:
       properties:
         detail:
-          $ref: '#/components/schemas/APIError'
+          allOf:
+          - $ref: '#/components/schemas/APIError'
           description: Detailed error information.
       type: object
       required:
@@ -1022,9 +1024,11 @@ components:
           title: Finish Reason
           default: ''
         delta:
-          $ref: '#/components/schemas/LLMMessage'
+          allOf:
+          - $ref: '#/components/schemas/LLMMessage'
         message:
-          $ref: '#/components/schemas/LLMMessage'
+          allOf:
+          - $ref: '#/components/schemas/LLMMessage'
       type: object
       required:
       - index

diff --git a/worker/runner.gen.go b/worker/runner.gen.go
diff --git a/worker/worker.go b/worker/worker.go
@@ -399,12 +399,15 @@ func (w *Worker) LLM(ctx context.Context, req GenLLMJSONRequestBody) (interface{
 	ctx, cancel := context.WithCancel(ctx)
 	c, err := w.borrowContainer(ctx, "llm", *req.Model)
 	if err != nil {
+		cancel()
 		return nil, err
 	}
 	if c == nil {
+		cancel()
 		return nil, errors.New("borrowed container is nil")
 	}
 	if c.Client == nil {
+		cancel()
 		return nil, errors.New("container client is nil")
 	}
 
@@ -781,19 +784,21 @@ func (w *Worker) handleStreamingResponse(ctx context.Context, c *RunnerContainer
 			default:
 				line := scanner.Text()
 				data := strings.TrimPrefix(line, "data: ")
-
+				if data == "" {
+					continue
+				}
 				if data == "[DONE]" {
 					break
 				}
 
-				var llmRes *LLMResponse
-				if err := json.Unmarshal([]byte(data), llmRes); err != nil {
-					slog.Error("Error unmarshaling stream data", slog.String("err", err.Error()))
+				var llmRes LLMResponse
+				if err := json.Unmarshal([]byte(data), &llmRes); err != nil {
+					slog.Error("Error unmarshaling stream data", slog.String("err", err.Error()), slog.String("json", data))
 					continue
 				}
 
 				select {
-				case outputChan <- llmRes:
+				case outputChan <- &llmRes:
 				case <-ctx.Done():
 					return
 				}