LLM Gateway

Image Generation

Generate images using AI models through the chat completions API

Image Generation

LLMGateway supports image generation models through the standard chat completions API. These models can create images based on text prompts and return them as base64-encoded data URLs.

Available Models

You can find all available image generation models on our models page.

Making Requests

Image generation works through the same /v1/chat/completions endpoint as text models. Simply use an image generation model and provide a text prompt describing the image you want to create.

curl -X POST "https://api.llmgateway.io/v1/chat/completions" \
  -H "Authorization: Bearer $LLM_GATEWAY_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gemini-2.5-flash-image-preview",
    "messages": [
      {
        "role": "user",
        "content": "Generate an image of a cute golden retriever puppy playing in a sunny meadow"
      }
    ]
  }'

Image Configuration

You can customize the generated image using the optional image_config parameter:

curl -X POST "https://api.llmgateway.io/v1/chat/completions" \
  -H "Authorization: Bearer $LLM_GATEWAY_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gemini-3-pro-image-preview",
    "messages": [
      {
        "role": "user",
        "content": "Generate an image of a mountain landscape at sunset"
      }
    ],
    "image_config": {
      "aspect_ratio": "16:9",
      "image_size": "4K"
    }
  }'

Parameters

ParameterTypeDescription
aspect_ratiostringThe aspect ratio of the generated image. Examples: "1:1", "16:9", "4:3", "5:4"
image_sizestringThe resolution of the generated image. Options: "1K" (1024x1024), "2K" (2048x2048), "4K" (4096x4096)

The image_config parameter is currently supported for Google's image generation models (e.g., gemini-3-pro-image-preview).

Response Format

Image generation models return responses in the standard chat completions format, with generated images included in the images array within the assistant message:

{
	"id": "chatcmpl-1756234109285",
	"object": "chat.completion",
	"created": 1756234109,
	"model": "gemini-2.5-flash-image-preview",
	"choices": [
		{
			"index": 0,
			"message": {
				"role": "assistant",
				"content": "Here's an image of a cute dog for you: ",
				"images": [
					{
						"type": "image_url",
						"image_url": {
							"url": "data:image/png;base64,<base64_encoded_image_data>"
						}
					}
				]
			},
			"finish_reason": "stop"
		}
	],
	"usage": {
		"prompt_tokens": 8,
		"completion_tokens": 1303,
		"total_tokens": 1311
	}
}

Vision support

You can edit or modify images by combining image generation with vision models by including the image in the messages array.

Response Structure

Images Array

The images array contains one or more generated images with the following structure:

  • type: Always "image_url" for generated images
  • image_url.url: A data URL containing the base64-encoded image data (format: data:image/png;base64,<data>)

Content Field

The content field may contain descriptive text about the generated image, depending on the model's behavior.

AI SDK

You can use the AI SDK to generate images with your existing generateText or streamText calls.

Example

/api/chat/route.ts
import { streamText, type UIMessage, convertToModelMessages } from "ai";
import { createLLMGateway } from "@llmgateway/ai-sdk-provider";

interface ChatRequestBody {
	messages: UIMessage[];
}

export async function POST(req: Request) {
	const body = await req.json();

	const { messages }: ChatRequestBody = body;

	const llmgateway = createLLMGateway({
		apiKey: "llmgateway_api_key",
		baseUrl: "https://api.llmgateway.io/v1",
	});

	try {
		const result = streamText({
			model: llmgateway.chat("gemini-2.5-flash-image-preview"),
			messages: convertToModelMessages(messages),
		});

		return result.toUIMessageStreamResponse();
	} catch {
		return new Response(
			JSON.stringify({ error: "LLM Gateway Chat request failed" }),
			{
				status: 500,
			},
		);
	}
}

Then you can render the image in your frontend using the Image component from the ai-elements.

Here is a full example of how to use the AI SDK to generate images in your frontend:

/app/page.tsx
"use client";

import { useState, useRef } from "react";
import { useChat } from "@ai-sdk/react";
import { parseImagePartToDataUrl } from "@/lib/image-utils";
import {
	PromptInput,
	PromptInputBody,
	PromptInputButton,
	PromptInputSubmit,
	PromptInputTextarea,
	PromptInputToolbar,
} from "@/components/ai-elements/prompt-input";
import {
	Conversation,
	ConversationContent,
} from "@/components/ai-elements/conversation";
import { Image } from "@/components/ai-elements/image";
import { Loader } from "@/components/ai-elements/loader";
import { Message, MessageContent } from "@/components/ai-elements/message";
import { Response } from "@/components/ai-elements/response";

export const ChatUI = () => {
	const textareaRef = useRef<HTMLTextAreaElement | null>(null);
	const [text, setText] = useState("");
	const { messages, status, stop, regenerate, sendMessage } = useChat();

	return (
		<>
			<div className="flex-1 overflow-y-auto px-4 pb-24">
				<Conversation>
					<ConversationContent>
						{messages.length === 0 ? (
							<div className="mb-6 text-center">
								<h2 className="text-3xl font-semibold tracking-tight">
									How can I help you?
								</h2>
							</div>
						) : (
							messages.map((m, messageIndex) => {
								const isLastMessage = messageIndex === messages.length - 1;

								if (m.role === "assistant") {
									const textContent = m.parts
										.filter((p) => p.type === "text")
										.map((p) => p.text)
										.join("");
									// Combine all image parts (both image_url and file types)
									const imageParts = m.parts.filter(
										(p) =>
											p.type === "file" && p.mediaType?.startsWith("image/"),
									);

									return (
										<div key={m.id}>
											{textContent ? <Response>{textContent}</Response> : null}
											{imageParts.length > 0 ? (
												<div className="mt-3 grid grid-cols-1 gap-3 sm:grid-cols-2">
													{imageParts.map((part, idx: number) => {
														const { base64Only, mediaType } =
															parseImagePartToDataUrl(part);

														if (!base64Only) {
															return null;
														}

														return (
															<Image
																key={idx}
																base64={base64Only}
																mediaType={mediaType}
																alt={part.name || "Generated image"}
															/>
														);
													})}
												</div>
											) : null}
											{isLastMessage &&
												(status === "submitted" || status === "streaming") && (
													<Loader />
												)}
										</div>
									);
								} else {
									return (
										<Message key={m.id} from={m.role}>
											<MessageContent variant="flat">
												{m.parts.map((p, i) => {
													if (p.type === "text") {
														return <div key={i}>{p.text}</div>;
													}
													return null;
												})}
											</MessageContent>
											{isLastMessage &&
												(status === "submitted" || status === "streaming") && (
													<Loader />
												)}
										</Message>
									);
								}
							})
						)}
					</ConversationContent>
				</Conversation>
			</div>
			<div className="sticky bottom-0 left-0 right-0 px-4 pb-[max(env(safe-area-inset-bottom),1rem)] pt-2 bg-gradient-to-t from-background via-background/95 to-transparent backdrop-blur supports-[backdrop-filter]:bg-background/60">
				<PromptInput
					aria-disabled={status === "streaming"}
					onSubmit={async (message) => {
						if (status === "streaming") {
							return;
						}

						try {
							const textContent = message.text ?? "";
							if (!textContent.trim()) {
								return;
							}

							setText(""); // Clear input immediately

							const parts = [{ type: "text", text: textContent }];

							// Call sendMessage which will handle adding the user message and API request
							sendMessage({
								role: "user",
								parts,
							});
						} catch (error) {
							// Throw error here
						}
					}}
				>
					<PromptInputBody>
						<PromptInputTextarea
							ref={textareaRef}
							value={text}
							onChange={(e) => setText(e.currentTarget.value)}
							placeholder="Message"
						/>
					</PromptInputBody>
					<PromptInputToolbar>
						<div className="flex items-center gap-2">
							{status === "streaming" ? (
								<PromptInputButton onClick={() => stop()} variant="ghost">
									Stop
								</PromptInputButton>
							) : null}
							<PromptInputSubmit
								status={status === "streaming" ? "streaming" : "ready"}
							/>
						</div>
					</PromptInputToolbar>
				</PromptInput>
			</div>
		</>
	);
};
/lib/image-utils.ts
/**
 * Parses a file object containing image data and returns a properly formatted data URL
 * and normalized media type.
 *
 * Handles:
 * - Normalizing mediaType from various property names (mediaType, mime_type)
 * - Detecting existing data: URLs
 * - Detecting base64-looking content
 * - Stripping whitespace from base64 content
 * - Building proper data:...;base64,... URLs
 */
export function parseImageFile(file: {
	url?: string;
	mediaType?: string;
	mime_type?: string;
}): { dataUrl: string; mediaType: string } {
	const mediaType = file.mediaType || file.mime_type || "image/png";
	let url = String(file.url || "");

	const isDataUrl = url.startsWith("data:");
	const looksLikeBase64 =
		!isDataUrl && /^[A-Za-z0-9+/=\s]+$/.test(url.slice(0, 200));

	if (looksLikeBase64) {
		url = url.replace(/\s+/g, "");
	}

	const dataUrl = isDataUrl
		? url
		: looksLikeBase64
			? `data:${mediaType};base64,${url}`
			: url;

	return { dataUrl, mediaType };
}

/**
 * Extracts base64-only content from a data URL.
 * Returns empty string if the input is not a valid data URL.
 */
export function extractBase64FromDataUrl(dataUrl: string): string {
	if (!dataUrl.startsWith("data:")) {
		return "";
	}

	const comma = dataUrl.indexOf(",");
	return comma >= 0 ? dataUrl.slice(comma + 1) : "";
}

/**
 * Parses an image part (either image_url or file type) and returns
 * dataUrl, base64Only, and mediaType ready for rendering.
 *
 * Handles error cases gracefully by returning empty base64Only string
 * when parsing fails, allowing the renderer to skip invalid images.
 */
export function parseImagePartToDataUrl(part: any): {
	dataUrl: string;
	base64Only: string;
	mediaType: string;
} {
	try {
		// Handle image_url parts
		if (part.type === "image_url" && part.image_url?.url) {
			const url = part.image_url.url;
			const mediaType = "image/png"; // Default for image_url parts

			if (url.startsWith("data:")) {
				// Extract media type from data URL if present
				const match = url.match(/data:([^;]+)/);
				const extractedMediaType = match?.[1] || mediaType;
				return {
					dataUrl: url,
					base64Only: extractBase64FromDataUrl(url),
					mediaType: extractedMediaType,
				};
			}

			return {
				dataUrl: url,
				base64Only: "",
				mediaType,
			};
		}

		// Handle file parts (AI SDK format)
		if (part.type === "file") {
			const { dataUrl, mediaType } = parseImageFile(part);
			return {
				dataUrl,
				base64Only: extractBase64FromDataUrl(dataUrl),
				mediaType,
			};
		}

		return {
			dataUrl: "",
			base64Only: "",
			mediaType: "image/png",
		};
	} catch {
		return {
			dataUrl: "",
			base64Only: "",
			mediaType: "image/png",
		};
	}
}

Usage Notes

Image generation models typically have higher token costs compared to text-only models due to the computational requirements of image synthesis.

Generated images are returned as base64-encoded data URLs, which can be large. Consider the payload size when integrating image generation into your applications.

How is this guide?

Last updated on