diff --git a/runner/Dockerfile b/runner/Dockerfile index 5d00e2d2..e3605e46 100644 --- a/runner/Dockerfile +++ b/runner/Dockerfile @@ -54,4 +54,6 @@ COPY images/ /app/images COPY bench.py /app/bench.py COPY example_data/ /app/example_data -CMD ["uvicorn", "app.main:app", "--log-config", "app/cfg/uvicorn_logging_config.json", "--host", "0.0.0.0", "--port", "8000"] +EXPOSE 8700 + +CMD ["uvicorn", "app.main:app", "--log-config", "app/cfg/uvicorn_logging_config.json", "--host", "0.0.0.0", "--port", "8700"] diff --git a/runner/app/main.py b/runner/app/main.py index b6441a04..a9d7ecb8 100644 --- a/runner/app/main.py +++ b/runner/app/main.py @@ -5,6 +5,9 @@ from app.routes import health from fastapi import FastAPI from fastapi.routing import APIRoute +from .routes import image_outpainting +from .pipelines.image_outpainting import ImageOutpaintingPipeline +from app.routes.image_outpainting import router as image_router logger = logging.getLogger(__name__) @@ -57,6 +60,8 @@ def load_pipeline(pipeline: str, model_id: str) -> any: case "llm": from app.pipelines.llm import LLMPipeline return LLMPipeline(model_id) + case "image-outpainting": + return ImageOutpaintingPipeline(model_id) case _: raise EnvironmentError( f"{pipeline} is not a valid pipeline for model {model_id}" @@ -94,6 +99,8 @@ def load_route(pipeline: str) -> any: case "llm": from app.routes import llm return llm.router + case "image-outpainting": + return image_router case _: raise EnvironmentError(f"{pipeline} is not a valid pipeline") @@ -113,3 +120,4 @@ def use_route_names_as_operation_ids(app: FastAPI) -> None: app = FastAPI(lifespan=lifespan) +app.include_router(image_router) diff --git a/runner/app/pipelines/image_outpainting.py b/runner/app/pipelines/image_outpainting.py new file mode 100644 index 00000000..e241bcc0 --- /dev/null +++ b/runner/app/pipelines/image_outpainting.py @@ -0,0 +1,54 @@ +from diffusers import AutoPipelineForInpainting +import torch +from PIL import Image +import numpy as np + +class ImageOutpaintingPipeline: + def __init__(self): + self.device = "cuda" if torch.cuda.is_available() else "cpu" + try: + # Use AutoPipelineForInpainting to load ProPainter + self.pipe = AutoPipelineForInpainting.from_pretrained("ruffy369/propainter", torch_dtype=torch.float16).to(self.device) + print("ProPainter model loaded successfully.") + except Exception as e: + print(f"Error loading ProPainter model: {e}") + self.pipe = None + + def __call__( + self, + image: Image.Image, + prompt: str, + negative_prompt: str = None, + num_inference_steps: int = 50, + guidance_scale: float = 7.5, + ): + if self.pipe is None: + print("ProPainter model is not loaded. Cannot perform outpainting.") + return None + + # Prepare the image for outpainting + width, height = image.size + target_size = min(max(width, height) * 2, 1024) # Double the size, but cap at 1024 + new_image = Image.new('RGB', (target_size, target_size), (255, 255, 255)) + new_image.paste(image, ((target_size - width) // 2, (target_size - height) // 2)) + + # Create a mask for outpainting + mask = Image.new('L', (target_size, target_size), 255) + mask.paste(0, ((target_size - width) // 2, (target_size - height) // 2, + (target_size + width) // 2, (target_size + height) // 2)) + + try: + # Generate the outpainted image + output = self.pipe( + prompt=prompt, + image=new_image, + mask_image=mask, + negative_prompt=negative_prompt, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + ).images[0] + + return output + except Exception as e: + print(f"Error during outpainting: {e}") + return None diff --git a/runner/app/routes/image_outpainting.py b/runner/app/routes/image_outpainting.py new file mode 100644 index 00000000..c0ae090e --- /dev/null +++ b/runner/app/routes/image_outpainting.py @@ -0,0 +1,57 @@ +from fastapi import APIRouter, File, UploadFile, Form, HTTPException, Query +from PIL import Image +import io +from ..pipelines.image_outpainting import ImageOutpaintingPipeline +from .util import ImageOutpaintingResponse + + +router = APIRouter() +pipeline = ImageOutpaintingPipeline() + + + +def resize_image(image: Image.Image, max_size: int = 1024) -> Image.Image: + """Resize image while maintaining aspect ratio if it exceeds max_size""" + if max(image.size) > max_size: + image.thumbnail((max_size, max_size)) + return image + +@router.post("/out-paint", response_model=ImageOutpaintingResponse) +async def out_paint( + image: UploadFile = File(...), + prompt: str = Form(...), + negative_prompt: str = Form(None), + num_inference_steps: int = Form(50, ge=1, le=1000), + guidance_scale: float = Form(7.5, ge=0, le=20), +): + if len(prompt) > 1000: + raise HTTPException(status_code=400, detail="Prompt is too long") + if negative_prompt and len(negative_prompt) > 1000: + raise HTTPException(status_code=400, detail="Negative prompt is too long") + + try: + image_content = await image.read() + input_image = resize_image(Image.open(io.BytesIO(image_content)).convert("RGB")) + + output_image = pipeline( + image=input_image, + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + ) + + # Convert the output image to bytes for response + img_byte_arr = io.BytesIO() + output_image.save(img_byte_arr, format='PNG') + img_byte_arr = img_byte_arr.getvalue() + + return ImageOutpaintingResponse( + image=img_byte_arr, + prompt=prompt, + negative_prompt=negative_prompt, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"An error occurred during outpainting: {str(e)}") diff --git a/runner/app/routes/util.py b/runner/app/routes/util.py index 2371c9e1..6976777e 100644 --- a/runner/app/routes/util.py +++ b/runner/app/routes/util.py @@ -181,3 +181,15 @@ def json_str_to_np_array( error_message += f": {e}" raise ValueError(error_message) return None + + +class ImageOutpaintingResponse(BaseModel): + """ + Response model for the image outpainting operation. + Contains the resulting outpainted image and the parameters used in the process.""" + + image: bytes = Field(..., description="The outpainted image in bytes format.") + prompt: str = Field(..., description="The prompt used for outpainting.") + negative_prompt: str = Field(None, description="The negative prompt used for outpainting, if any.") + num_inference_steps: int = Field(..., description="The number of inference steps used.") + guidance_scale: float = Field(..., description="The guidance scale used for outpainting.") diff --git a/runner/dl_checkpoints.sh b/runner/dl_checkpoints.sh index 5a241fdf..6b1c24a3 100755 --- a/runner/dl_checkpoints.sh +++ b/runner/dl_checkpoints.sh @@ -43,6 +43,16 @@ function download_beta_models() { # Download image-to-video models (token-gated). check_hf_auth huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt-1-1 --include "*.fp16.safetensors" "*.json" --cache-dir models ${TOKEN_FLAG:+"$TOKEN_FLAG"} + + # Download ProPainter and stable-outpainting models + printf "\nDownloading outpainting models...\n" + huggingface-cli download ruffy369/propainter --cache-dir models + # Only download stable-outpainting if ProPainter fails + if [ $? -ne 0 ]; then + printf "Failed to download ProPainter model. Downloading stable-outpainting as backup...\n" + huggingface-cli download Brvcket/stable-outpainting-xl-0.1 --cache-dir models + fi + } # Download all models. diff --git a/runner/gateway.openapi.yaml b/runner/gateway.openapi.yaml index a8bba1b1..754c746b 100644 --- a/runner/gateway.openapi.yaml +++ b/runner/gateway.openapi.yaml @@ -2,17 +2,17 @@ openapi: 3.1.0 info: title: Livepeer AI Runner description: An application to run AI pipelines - version: '' + version: "" servers: -- url: https://dream-gateway.livepeer.cloud - description: Livepeer Cloud Community Gateway -- url: https://livepeer.studio/api/beta/generate - description: Livepeer Studio Gateway + - url: https://dream-gateway.livepeer.cloud + description: Livepeer Cloud Community Gateway + - url: https://livepeer.studio/api/beta/generate + description: Livepeer Studio Gateway paths: /text-to-image: post: tags: - - generate + - generate summary: Text To Image description: Generate images from text prompts. operationId: genTextToImage @@ -20,47 +20,47 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/TextToImageParams' + $ref: "#/components/schemas/TextToImageParams" required: true responses: - '200': + "200": description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ImageResponse' + $ref: "#/components/schemas/ImageResponse" x-speakeasy-name-override: data - '400': + "400": description: Bad Request content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '401': + $ref: "#/components/schemas/HTTPError" + "401": description: Unauthorized content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '500': + $ref: "#/components/schemas/HTTPError" + "500": description: Internal Server Error content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '422': + $ref: "#/components/schemas/HTTPError" + "422": description: Validation Error content: application/json: schema: - $ref: '#/components/schemas/HTTPValidationError' + $ref: "#/components/schemas/HTTPValidationError" security: - - HTTPBearer: [] + - HTTPBearer: [] x-speakeasy-name-override: textToImage /image-to-image: post: tags: - - generate + - generate summary: Image To Image description: Apply image transformations to a provided image. operationId: genImageToImage @@ -68,47 +68,47 @@ paths: content: multipart/form-data: schema: - $ref: '#/components/schemas/Body_genImageToImage' + $ref: "#/components/schemas/Body_genImageToImage" required: true responses: - '200': + "200": description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ImageResponse' + $ref: "#/components/schemas/ImageResponse" x-speakeasy-name-override: data - '400': + "400": description: Bad Request content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '401': + $ref: "#/components/schemas/HTTPError" + "401": description: Unauthorized content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '500': + $ref: "#/components/schemas/HTTPError" + "500": description: Internal Server Error content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '422': + $ref: "#/components/schemas/HTTPError" + "422": description: Validation Error content: application/json: schema: - $ref: '#/components/schemas/HTTPValidationError' + $ref: "#/components/schemas/HTTPValidationError" security: - - HTTPBearer: [] + - HTTPBearer: [] x-speakeasy-name-override: imageToImage /image-to-video: post: tags: - - generate + - generate summary: Image To Video description: Generate a video from a provided image. operationId: genImageToVideo @@ -116,47 +116,47 @@ paths: content: multipart/form-data: schema: - $ref: '#/components/schemas/Body_genImageToVideo' + $ref: "#/components/schemas/Body_genImageToVideo" required: true responses: - '200': + "200": description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/VideoResponse' + $ref: "#/components/schemas/VideoResponse" x-speakeasy-name-override: data - '400': + "400": description: Bad Request content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '401': + $ref: "#/components/schemas/HTTPError" + "401": description: Unauthorized content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '500': + $ref: "#/components/schemas/HTTPError" + "500": description: Internal Server Error content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '422': + $ref: "#/components/schemas/HTTPError" + "422": description: Validation Error content: application/json: schema: - $ref: '#/components/schemas/HTTPValidationError' + $ref: "#/components/schemas/HTTPValidationError" security: - - HTTPBearer: [] + - HTTPBearer: [] x-speakeasy-name-override: imageToVideo /upscale: post: tags: - - generate + - generate summary: Upscale description: Upscale an image by increasing its resolution. operationId: genUpscale @@ -164,47 +164,47 @@ paths: content: multipart/form-data: schema: - $ref: '#/components/schemas/Body_genUpscale' + $ref: "#/components/schemas/Body_genUpscale" required: true responses: - '200': + "200": description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/ImageResponse' + $ref: "#/components/schemas/ImageResponse" x-speakeasy-name-override: data - '400': + "400": description: Bad Request content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '401': + $ref: "#/components/schemas/HTTPError" + "401": description: Unauthorized content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '500': + $ref: "#/components/schemas/HTTPError" + "500": description: Internal Server Error content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '422': + $ref: "#/components/schemas/HTTPError" + "422": description: Validation Error content: application/json: schema: - $ref: '#/components/schemas/HTTPValidationError' + $ref: "#/components/schemas/HTTPValidationError" security: - - HTTPBearer: [] + - HTTPBearer: [] x-speakeasy-name-override: upscale /audio-to-text: post: tags: - - generate + - generate summary: Audio To Text description: Transcribe audio files to text. operationId: genAudioToText @@ -212,53 +212,53 @@ paths: content: multipart/form-data: schema: - $ref: '#/components/schemas/Body_genAudioToText' + $ref: "#/components/schemas/Body_genAudioToText" required: true responses: - '200': + "200": description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/TextResponse' + $ref: "#/components/schemas/TextResponse" x-speakeasy-name-override: data - '400': + "400": description: Bad Request content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '401': + $ref: "#/components/schemas/HTTPError" + "401": description: Unauthorized content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '413': + $ref: "#/components/schemas/HTTPError" + "413": description: Request Entity Too Large content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '500': + $ref: "#/components/schemas/HTTPError" + "500": description: Internal Server Error content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '422': + $ref: "#/components/schemas/HTTPError" + "422": description: Validation Error content: application/json: schema: - $ref: '#/components/schemas/HTTPValidationError' + $ref: "#/components/schemas/HTTPValidationError" security: - - HTTPBearer: [] + - HTTPBearer: [] x-speakeasy-name-override: audioToText /segment-anything-2: post: tags: - - generate + - generate summary: Segment Anything 2 description: Segment objects in an image. operationId: genSegmentAnything2 @@ -266,47 +266,47 @@ paths: content: multipart/form-data: schema: - $ref: '#/components/schemas/Body_genSegmentAnything2' + $ref: "#/components/schemas/Body_genSegmentAnything2" required: true responses: - '200': + "200": description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/MasksResponse' + $ref: "#/components/schemas/MasksResponse" x-speakeasy-name-override: data - '400': + "400": description: Bad Request content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '401': + $ref: "#/components/schemas/HTTPError" + "401": description: Unauthorized content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '500': + $ref: "#/components/schemas/HTTPError" + "500": description: Internal Server Error content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '422': + $ref: "#/components/schemas/HTTPError" + "422": description: Validation Error content: application/json: schema: - $ref: '#/components/schemas/HTTPValidationError' + $ref: "#/components/schemas/HTTPValidationError" security: - - HTTPBearer: [] + - HTTPBearer: [] x-speakeasy-name-override: segmentAnything2 /llm: post: tags: - - generate + - generate summary: LLM description: Generate text using a language model. operationId: genLLM @@ -314,42 +314,79 @@ paths: content: application/x-www-form-urlencoded: schema: - $ref: '#/components/schemas/Body_genLLM' + $ref: "#/components/schemas/Body_genLLM" required: true responses: - '200': + "200": description: Successful Response content: application/json: schema: - $ref: '#/components/schemas/LLMResponse' - '400': + $ref: "#/components/schemas/LLMResponse" + "400": description: Bad Request content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '401': + $ref: "#/components/schemas/HTTPError" + "401": description: Unauthorized content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '500': + $ref: "#/components/schemas/HTTPError" + "500": description: Internal Server Error content: application/json: schema: - $ref: '#/components/schemas/HTTPError' - '422': + $ref: "#/components/schemas/HTTPError" + "422": description: Validation Error content: application/json: schema: - $ref: '#/components/schemas/HTTPValidationError' + $ref: "#/components/schemas/HTTPValidationError" security: - - HTTPBearer: [] + - HTTPBearer: [] x-speakeasy-name-override: llm + /out-paint: + post: + summary: Outpaint an image + operationId: outPaintImage + tags: + - image outpainting + requestBody: + content: + multipart/form-data: + schema: + type: object + properties: + image: + type: string + format: binary + prompt: + type: string + negative_prompt: + type: string + controlnet_conditioning_scale: + type: number + format: float + num_inference_steps: + type: integer + guidance_scale: + type: number + format: float + required: + - image + - prompt + responses: + "200": + description: Successful response + content: + application/json: + schema: + $ref: "#/components/schemas/ImageOutpaintingResponse" components: schemas: APIError: @@ -360,7 +397,7 @@ components: description: The error message. type: object required: - - msg + - msg title: APIError description: API error response model. Body_genAudioToText: @@ -374,11 +411,11 @@ components: type: string title: Model Id description: Hugging Face model ID used for transcription. - default: '' + default: "" type: object required: - - audio - - model_id + - audio + - model_id title: Body_genAudioToText Body_genImageToImage: properties: @@ -395,42 +432,48 @@ components: type: string title: Model Id description: Hugging Face model ID used for image generation. - default: '' + default: "" loras: type: string title: Loras - description: 'A LoRA (Low-Rank Adaptation) model and its corresponding weight + description: + 'A LoRA (Low-Rank Adaptation) model and its corresponding weight for image generation. Example: { "latent-consistency/lcm-lora-sdxl": 1.0, "nerijs/pixel-art-xl": 1.2}.' - default: '' + default: "" strength: type: number title: Strength - description: Degree of transformation applied to the reference image (0 + description: + Degree of transformation applied to the reference image (0 to 1). default: 0.8 guidance_scale: type: number title: Guidance Scale - description: Encourages model to generate images closely linked to the text + description: + Encourages model to generate images closely linked to the text prompt (higher values may reduce image quality). default: 7.5 image_guidance_scale: type: number title: Image Guidance Scale - description: Degree to which the generated image is pushed towards the initial + description: + Degree to which the generated image is pushed towards the initial image. default: 1.5 negative_prompt: type: string title: Negative Prompt - description: Text prompt(s) to guide what to exclude from image generation. + description: + Text prompt(s) to guide what to exclude from image generation. Ignored if guidance_scale < 1. - default: '' + default: "" safety_check: type: boolean title: Safety Check - description: Perform a safety check to estimate if generated images could + description: + Perform a safety check to estimate if generated images could be offensive or harmful. default: true seed: @@ -440,7 +483,8 @@ components: num_inference_steps: type: integer title: Num Inference Steps - description: Number of denoising steps. More steps usually lead to higher + description: + Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength. default: 100 num_images_per_prompt: @@ -450,9 +494,9 @@ components: default: 1 type: object required: - - prompt - - image - - model_id + - prompt + - image + - model_id title: Body_genImageToImage Body_genImageToVideo: properties: @@ -465,7 +509,7 @@ components: type: string title: Model Id description: Hugging Face model ID used for video generation. - default: '' + default: "" height: type: integer title: Height @@ -484,19 +528,22 @@ components: motion_bucket_id: type: integer title: Motion Bucket Id - description: Used for conditioning the amount of motion for the generation. + description: + Used for conditioning the amount of motion for the generation. The higher the number the more motion will be in the video. default: 127 noise_aug_strength: type: number title: Noise Aug Strength - description: Amount of noise added to the conditioning image. Higher values + description: + Amount of noise added to the conditioning image. Higher values reduce resemblance to the conditioning image and increase motion. default: 0.02 safety_check: type: boolean title: Safety Check - description: Perform a safety check to estimate if generated images could + description: + Perform a safety check to estimate if generated images could be offensive or harmful. default: true seed: @@ -506,13 +553,14 @@ components: num_inference_steps: type: integer title: Num Inference Steps - description: Number of denoising steps. More steps usually lead to higher + description: + Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength. default: 25 type: object required: - - image - - model_id + - image + - model_id title: Body_genImageToVideo Body_genLLM: properties: @@ -522,11 +570,11 @@ components: model_id: type: string title: Model Id - default: '' + default: "" system_msg: type: string title: System Msg - default: '' + default: "" temperature: type: number title: Temperature @@ -538,15 +586,15 @@ components: history: type: string title: History - default: '[]' + default: "[]" stream: type: boolean title: Stream default: false type: object required: - - prompt - - model_id + - prompt + - model_id title: Body_genLLM Body_genSegmentAnything2: properties: @@ -559,49 +607,56 @@ components: type: string title: Model Id description: Hugging Face model ID used for image generation. - default: '' + default: "" point_coords: type: string title: Point Coords - description: Nx2 array of point prompts to the model, where each point is + description: + Nx2 array of point prompts to the model, where each point is in (X,Y) in pixels. point_labels: type: string title: Point Labels - description: Labels for the point prompts, where 1 indicates a foreground + description: + Labels for the point prompts, where 1 indicates a foreground point and 0 indicates a background point. box: type: string title: Box - description: A length 4 array given as a box prompt to the model, in XYXY + description: + A length 4 array given as a box prompt to the model, in XYXY format. mask_input: type: string title: Mask Input - description: A low-resolution mask input to the model, typically from a + description: + A low-resolution mask input to the model, typically from a previous prediction iteration, with the form 1xHxW (H=W=256 for SAM). multimask_output: type: boolean title: Multimask Output - description: If true, the model will return three masks for ambiguous input + description: + If true, the model will return three masks for ambiguous input prompts, often producing better masks than a single prediction. default: true return_logits: type: boolean title: Return Logits - description: If true, returns un-thresholded mask logits instead of a binary + description: + If true, returns un-thresholded mask logits instead of a binary mask. default: true normalize_coords: type: boolean title: Normalize Coords - description: If true, the point coordinates will be normalized to the range + description: + If true, the point coordinates will be normalized to the range [0,1], with point_coords expected to be with respect to image dimensions. default: true type: object required: - - image - - model_id + - image + - model_id title: Body_genSegmentAnything2 Body_genUpscale: properties: @@ -618,11 +673,12 @@ components: type: string title: Model Id description: Hugging Face model ID used for upscaled image generation. - default: '' + default: "" safety_check: type: boolean title: Safety Check - description: Perform a safety check to estimate if generated images could + description: + Perform a safety check to estimate if generated images could be offensive or harmful. default: true seed: @@ -632,31 +688,32 @@ components: num_inference_steps: type: integer title: Num Inference Steps - description: Number of denoising steps. More steps usually lead to higher + description: + Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength. default: 75 type: object required: - - prompt - - image - - model_id + - prompt + - image + - model_id title: Body_genUpscale HTTPError: properties: detail: allOf: - - $ref: '#/components/schemas/APIError' + - $ref: "#/components/schemas/APIError" description: Detailed error information. type: object required: - - detail + - detail title: HTTPError description: HTTP error response model. HTTPValidationError: properties: detail: items: - $ref: '#/components/schemas/ValidationError' + $ref: "#/components/schemas/ValidationError" type: array title: Detail type: object @@ -665,13 +722,13 @@ components: properties: images: items: - $ref: '#/components/schemas/Media' + $ref: "#/components/schemas/Media" type: array title: Images description: The generated images. type: object required: - - images + - images title: ImageResponse description: Response model for image generation. LLMResponse: @@ -684,8 +741,8 @@ components: title: Tokens Used type: object required: - - response - - tokens_used + - response + - tokens_used title: LLMResponse MasksResponse: properties: @@ -703,9 +760,9 @@ components: description: The raw, unnormalized predictions (logits) for the masks. type: object required: - - masks - - scores - - logits + - masks + - scores + - logits title: MasksResponse description: Response model for object segmentation. Media: @@ -724,9 +781,9 @@ components: description: Whether the media was flagged as NSFW. type: object required: - - url - - seed - - nsfw + - url + - seed + - nsfw title: Media description: A media object containing information about the generated media. TextResponse: @@ -737,14 +794,14 @@ components: description: The generated text. chunks: items: - $ref: '#/components/schemas/chunk' + $ref: "#/components/schemas/chunk" type: array title: Chunks description: The generated text chunks. type: object required: - - text - - chunks + - text + - chunks title: TextResponse description: Response model for text generation. TextToImageParams: @@ -753,18 +810,20 @@ components: type: string title: Model Id description: Hugging Face model ID used for image generation. - default: '' + default: "" loras: type: string title: Loras - description: 'A LoRA (Low-Rank Adaptation) model and its corresponding weight + description: + 'A LoRA (Low-Rank Adaptation) model and its corresponding weight for image generation. Example: { "latent-consistency/lcm-lora-sdxl": 1.0, "nerijs/pixel-art-xl": 1.2}.' - default: '' + default: "" prompt: type: string title: Prompt - description: Text prompt(s) to guide image generation. Separate multiple + description: + Text prompt(s) to guide image generation. Separate multiple prompts with '|' if supported by the model. height: type: integer @@ -779,19 +838,22 @@ components: guidance_scale: type: number title: Guidance Scale - description: Encourages model to generate images closely linked to the text + description: + Encourages model to generate images closely linked to the text prompt (higher values may reduce image quality). default: 7.5 negative_prompt: type: string title: Negative Prompt - description: Text prompt(s) to guide what to exclude from image generation. + description: + Text prompt(s) to guide what to exclude from image generation. Ignored if guidance_scale < 1. - default: '' + default: "" safety_check: type: boolean title: Safety Check - description: Perform a safety check to estimate if generated images could + description: + Perform a safety check to estimate if generated images could be offensive or harmful. default: true seed: @@ -801,7 +863,8 @@ components: num_inference_steps: type: integer title: Num Inference Steps - description: Number of denoising steps. More steps usually lead to higher + description: + Number of denoising steps. More steps usually lead to higher quality images but slower inference. Modulated by strength. default: 50 num_images_per_prompt: @@ -811,16 +874,16 @@ components: default: 1 type: object required: - - prompt - - model_id + - prompt + - model_id title: TextToImageParams ValidationError: properties: loc: items: anyOf: - - type: string - - type: integer + - type: string + - type: integer type: array title: Location msg: @@ -831,21 +894,21 @@ components: title: Error Type type: object required: - - loc - - msg - - type + - loc + - msg + - type title: ValidationError VideoResponse: properties: images: items: - $ref: '#/components/schemas/Media' + $ref: "#/components/schemas/Media" type: array title: Images description: The generated images. type: object required: - - images + - images title: VideoResponse description: Response model for image generation. chunk: @@ -861,10 +924,21 @@ components: description: The text of the chunk. type: object required: - - timestamp - - text + - timestamp + - text title: chunk description: A chunk of text with a timestamp. + ImageOutpaintingResponse: + properties: + image: + type: string + format: binary + title: Image + description: The outpainted image. + type: object + required: + - image + title: ImageOutpaintingResponse securitySchemes: HTTPBearer: type: http diff --git a/runner/gen_openapi.py b/runner/gen_openapi.py index 6f557055..e3697957 100644 --- a/runner/gen_openapi.py +++ b/runner/gen_openapi.py @@ -12,7 +12,8 @@ segment_anything_2, text_to_image, upscale, - llm + llm, + image_outpainting ) from fastapi.openapi.utils import get_openapi import subprocess @@ -125,6 +126,7 @@ def write_openapi(fname: str, entrypoint: str = "runner", version: str = "0.0.0" app.include_router(audio_to_text.router) app.include_router(segment_anything_2.router) app.include_router(llm.router) + app.include_router(image_outpainting.router, tags=["image outpainting"]) logger.info(f"Generating OpenAPI schema for '{entrypoint}' entrypoint...") openapi = get_openapi( diff --git a/runner/requirements.txt b/runner/requirements.txt index 87f72e43..9b0edef3 100644 --- a/runner/requirements.txt +++ b/runner/requirements.txt @@ -6,16 +6,20 @@ pydantic==2.7.2 Pillow==10.3.0 python-multipart==0.0.9 uvicorn==0.30.0 -huggingface_hub==0.23.2 +huggingface_hub[cli]==0.23.2 xformers==0.0.23 triton>=2.1.0 peft==0.11.1 deepcache==0.1.1 safetensors==0.4.3 scipy==1.13.0 -numpy==1.26.4 +numpy==2.1.2 av==12.1.0 -sentencepiece== 0.2.0 +sentencepiece==0.2.0 protobuf==5.27.2 bitsandbytes==0.43.3 psutil==6.0.0 +controlnet_aux==0.0.7 +opencv-python-headless==4.8.0.74 +torch==2.1.1 +torchvision==0.16.1 \ No newline at end of file