{ "version": "2025.12.08", "models": [ { "id": "relace-search", "name": "Relace: Relace Search", "description": "The relace-search model uses 4-12 `view_file` and `grep` tools in parallel to explore a codebase and return relevant files to the user request. \n\nIn contrast to RAG, relace-search performs agentic multi-step reasoning to produce highly precise results 4x faster than any frontier model. It's designed to serve as a subagent that passes its findings to an \"oracle\" coding agent, who orchestrates/performs the rest of the coding task.\n\nTo use relace-search you need to build an appropriate agent harness, and parse the response for relevant information to hand off to the oracle. Read more about it in the [Relace documentation](https://docs.relace.ai/docs/fast-agentic-search/agent).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 3, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "relace/relace-search-20251208", "created": 1765213560 } }, { "id": "glm-4.6v", "name": "Z.AI: GLM 4.6V", "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION", "VIDEO_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT", "VIDEO" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 24000, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.8999999999999999, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.049999999999999996, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "z-ai/glm-4.6-20251208", "created": 1765207462 } }, { "id": "rnj-1-instruct", "name": "EssentialAI: Rnj 1 Instruct", "description": "Rnj-1 is an 8B-parameter, dense, open-weight model family developed by Essential AI and trained from scratch with a focus on programming, math, and scientific reasoning. The model demonstrates strong performance across multiple programming languages, tool-use workflows, and agentic execution environments (e.g., mini-SWE-agent). ", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.15, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "essentialai/rnj-1-instruct", "created": 1765094847 } }, { "id": "bodybuilder", "name": "Body Builder", "description": "Transform your natural language requests into structured OpenRouter API request objects. Describe what you want to accomplish with AI models, and Body Builder will construct the appropriate API calls. Example: \"count to 10 using gemini and opus.\"\n\nThis is useful for creating multi-model requests, custom model routers, or programmatic generation of API calls from human descriptions.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "openrouter", "tags": [ "text->text", "Router" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openrouter/bodybuilder", "created": 1764903653 } }, { "id": "gpt-5.1-codex-max", "description": "GPT-5.1-Codex-Max is a frontier programming model built for the agent-driven era. Powered by an upgraded core reasoning architecture, it is specially trained for complex agentic tasks in software engineering, mathematics, and scientific research. It delivers faster performance, greater stability, and higher token efficiency across the entire development lifecycle, including code generation, refactoring, debugging, and engineering collaboration. With native support for multiple context windows and a built-in compaction mechanism, the model can coherently process millions of tokens within a single task.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.125, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "function_calling", "structured_outputs", "thinking" ], "original_types": "llm", "original_features": "function_calling,structured_outputs,thinking", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5.1-codex-max-20251204", "created": 1764878934 }, "name": "OpenAI: GPT-5.1-Codex-Max", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "nova-2-lite-v1:free", "name": "Amazon: Nova 2 Lite (free)", "description": "Nova 2 Lite is a fast, cost-effective reasoning model for everyday workloads that can process text, images, and videos to generate text. \n\nNova 2 Lite demonstrates standout capabilities in processing documents, extracting information from videos, generating code, providing accurate grounded answers, and automating multi-step agentic workflows.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING", "IMAGE_RECOGNITION", "VIDEO_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "max_output_tokens": 65535, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "tools", "Nova" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "amazon/nova-2-lite-v1", "created": 1764696672 } }, { "id": "nova-2-lite-v1", "name": "Amazon: Nova 2 Lite", "description": "Nova 2 Lite is a fast, cost-effective reasoning model for everyday workloads that can process text, images, and videos to generate text. \n\nNova 2 Lite demonstrates standout capabilities in processing documents, extracting information from videos, generating code, providing accurate grounded answers, and automating multi-step agentic workflows.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING", "IMAGE_RECOGNITION", "VIDEO_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "max_output_tokens": 65535, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "tools", "Nova" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "amazon/nova-2-lite-v1", "created": 1764696672 } }, { "id": "ministral-14b-2512", "name": "Mistral: Ministral 3 14B 2512", "description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/ministral-14b-2512", "created": 1764681735 } }, { "id": "ministral-8b-2512", "name": "Mistral: Ministral 3 8B 2512", "description": "A balanced model in the Ministral 3 family, Ministral 3 8B is a powerful, efficient tiny language model with vision capabilities.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.15, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/ministral-8b-2512", "created": 1764681654 } }, { "id": "ministral-3b-2512", "name": "Mistral: Ministral 3 3B 2512", "description": "The smallest model in the Ministral 3 family, Ministral 3 3B is a powerful, efficient tiny language model with vision capabilities.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/ministral-3b-2512", "created": 1764681560 } }, { "id": "mistral-large-2512", "name": "Mistral: Mistral Large 3 2512", "description": "Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/mistral-large-2512", "created": 1764624472 } }, { "id": "trinity-mini:free", "name": "Arcee AI: Trinity Mini (free)", "description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function calling and multi-step agent workflows.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "arcee-ai/trinity-mini-20251201", "created": 1764601720 } }, { "id": "trinity-mini", "name": "Arcee AI: Trinity Mini", "description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function calling and multi-step agent workflows.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.045, "currency": "USD" }, "output": { "per_million_tokens": 0.15, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "arcee-ai/trinity-mini-20251201", "created": 1764601720 } }, { "id": "deepseek-v3.2-speciale", "description": "DeepSeek-V3.2-Speciale is an enhanced long-thinking variant of DeepSeek-V3.2 that integrates the theorem-proving capabilities of DeepSeek-Math-V2. It excels in instruction following, mathematical reasoning, and logical verification, achieving performance comparable to Gemini-3.0-Pro on major reasoning benchmarks and winning gold medals at IMO 2025, CMO 2025, ICPC World Finals 2025, and IOI 2025. However, due to its long-thinking mechanism, the model may overthink simple questions, so task complexity should be carefully controlled during usage. The model only supports the thinking version.", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 0.302, "currency": "USD" }, "output": { "per_million_tokens": 0.453, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0302, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking" ], "original_types": "llm", "original_features": "thinking", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-v3.2-speciale-20251201", "created": 1764594837 }, "name": "DeepSeek: DeepSeek V3.2 Speciale", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "deepseek-v3.2", "description": "DeepSeek-V3.2 is an efficient large language model equipped with DeepSeek Sparse Attention and reinforced reasoning performance, but its core strength lies in powerful agentic capabilities—enabled by large-scale task-synthesis that tightly integrates reasoning with real-world tool use, delivering robust, compliant, and generalizable agent behaviour. Users can toggle deeper reasoning through the reasoning_enabled switch.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 0.302, "currency": "USD" }, "output": { "per_million_tokens": 0.453, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0302, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-v3.2-20251201", "created": 1764594642 }, "name": "DeepSeek: DeepSeek V3.2", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "intellect-3", "name": "Prime Intellect: INTELLECT-3", "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 1.1, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "prime-intellect/intellect-3-20251126", "created": 1764212534 } }, { "id": "tng-r1t-chimera:free", "name": "TNG: R1T Chimera (free)", "description": "TNG-R1T-Chimera is an experimental LLM with a faible for creative storytelling and character interaction. It is a derivate of the original TNG/DeepSeek-R1T-Chimera released in April 2025 and is available exclusively via Chutes and OpenRouter.\n\nCharacteristics and improvements include:\n\nWe think that it has a creative and pleasant personality.\nIt has a preliminary EQ-Bench3 value of about 1305.\nIt is quite a bit more intelligent than the original, albeit a slightly slower.\nIt is much more think-token consistent, i.e. reasoning and answer blocks are properly delineated.\nTool calling is much improved.\n\nTNG Tech, the model authors, ask that users follow the careful guidelines that Microsoft has created for their \"MAI-DS-R1\" DeepSeek-based model. These guidelines are available on Hugging Face (https://huggingface.co/microsoft/MAI-DS-R1).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "max_output_tokens": 163840, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "tngtech/tng-r1t-chimera", "created": 1764184161 } }, { "id": "tng-r1t-chimera", "name": "TNG: R1T Chimera", "description": "TNG-R1T-Chimera is an experimental LLM with a faible for creative storytelling and character interaction. It is a derivate of the original TNG/DeepSeek-R1T-Chimera released in April 2025 and is available exclusively via Chutes and OpenRouter.\n\nCharacteristics and improvements include:\n\nWe think that it has a creative and pleasant personality.\nIt has a preliminary EQ-Bench3 value of about 1305.\nIt is quite a bit more intelligent than the original, albeit a slightly slower.\nIt is much more think-token consistent, i.e. reasoning and answer blocks are properly delineated.\nTool calling is much improved.\n\nTNG Tech, the model authors, ask that users follow the careful guidelines that Microsoft has created for their \"MAI-DS-R1\" DeepSeek-based model. These guidelines are available on Hugging Face (https://huggingface.co/microsoft/MAI-DS-R1).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "max_output_tokens": 163840, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "tngtech/tng-r1t-chimera", "created": 1764184161 } }, { "id": "claude-opus-4.5", "name": "Anthropic: Claude Opus 4.5", "description": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection. The model is designed to operate efficiently across varied effort levels, enabling developers to trade off speed, depth, and token usage depending on task requirements. It comes with a new parameter to control token efficiency, which can be accessed using the OpenRouter Verbosity parameter with low, medium, or high.\n\nOpus 4.5 supports advanced tool use, extended context management, and coordinated multi-agent setups, making it well-suited for autonomous research, debugging, multi-step planning, and spreadsheet/browser manipulation. It delivers substantial gains in structured reasoning, execution reliability, and alignment compared to prior Opus generations, while reducing token overhead and improving performance on long-running tasks.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "WEB_SEARCH", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 25, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-4.5-opus-20251124", "created": 1764010580 } }, { "id": "olmo-3-32b-think:free", "name": "AllenAI: Olmo 3 32B Think (free)", "description": "Olmo 3 32B Think is a large-scale, 32-billion-parameter model purpose-built for deep reasoning, complex logic chains and advanced instruction-following scenarios. Its capacity enables strong performance on demanding evaluation tasks and highly nuanced conversational reasoning. Developed by Ai2 under the Apache 2.0 license, Olmo 3 32B Think embodies the Olmo initiative’s commitment to openness, offering full transparency across weights, code and training methodology.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "max_output_tokens": 65536, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "allenai/olmo-3-32b-think-20251121", "created": 1763758276 } }, { "id": "olmo-3-7b-instruct", "name": "AllenAI: Olmo 3 7B Instruct", "description": "Olmo 3 7B Instruct is a supervised instruction-fine-tuned variant of the Olmo 3 7B base model, optimized for instruction-following, question-answering, and natural conversational dialogue. By leveraging high-quality instruction data and an open training pipeline, it delivers strong performance across everyday NLP tasks while remaining accessible and easy to integrate. Developed by Ai2 under the Apache 2.0 license, the model offers a transparent, community-friendly option for instruction-driven applications.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "allenai/olmo-3-7b-instruct-20251121", "created": 1763758273 } }, { "id": "olmo-3-7b-think", "name": "AllenAI: Olmo 3 7B Think", "description": "Olmo 3 7B Think is a research-oriented language model in the Olmo family designed for advanced reasoning and instruction-driven tasks. It excels at multi-step problem solving, logical inference, and maintaining coherent conversational context. Developed by Ai2 under the Apache 2.0 license, Olmo 3 7B Think supports transparent, fully open experimentation and provides a lightweight yet capable foundation for academic research and practical NLP workflows.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "allenai/olmo-3-7b-think-20251121", "created": 1763758270 } }, { "id": "gemini-3-pro-image-preview", "description": "Gemini-3-Pro-Image-Preview (Nano Banana Pro) is a high-performance image generation and editing model built on Gemini 3 Pro. It delivers enhanced multimodal understanding and real-world semantic reasoning, enabling fast creation of well-structured visual content such as infographics, product sketches, and multi-subject scenes. It can also leverage real-time knowledge through Search grounding. The model excels in text rendering, consistent multi-image blending, and identity preservation, while offering fine-grained creative controls like localized edits, lighting and focus adjustments, camera transformations, and flexible aspect ratios. It’s ideal for rapid design, concept previews, product visualization, and everyday image generation workflows.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 12, "currency": "USD" }, "cache_read": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation", "original_architecture": "text+image->text+image", "canonical_slug": "google/gemini-3-pro-image-preview-20251120", "created": 1763653797 }, "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)", "owned_by": "openrouter", "context_window": 65536, "max_output_tokens": 32768 }, { "id": "grok-4.1-fast", "name": "xAI: Grok 4.1 Fast", "description": "Grok 4.1 Fast is xAI's best agentic tool calling model that shines in real-world use cases like customer support and deep research. 2M context window.\n\nReasoning can be enabled/disabled using the `reasoning` `enabled` parameter in the API. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#controlling-reasoning-tokens)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 2000000, "max_output_tokens": 30000, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.049999999999999996, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Grok" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "x-ai/grok-4.1-fast", "created": 1763587502 } }, { "id": "gemini-3-pro-preview", "description": "google state-of-the-art thinking model, capable of reasoning over complex problems in code, math, and STEM, as well as analyzing large datasets, codebases, and documents using long context.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT", "WEB_SEARCH" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "output_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 12, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs", "web", "deepsearch", "long_context" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs,web,deepsearch,long_context", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-3-pro-preview-20251117", "created": 1763474668 }, "name": "Google: Gemini 3 Pro Preview", "owned_by": "openrouter", "context_window": 1048576, "max_output_tokens": 65536 }, { "id": "cogito-v2.1-671b", "name": "Deep Cogito: Cogito v2.1 671B", "description": "Cogito v2.1 671B MoE represents one of the strongest open models globally, matching performance of frontier closed and open models. This model is trained using self play with reinforcement learning to reach state-of-the-art performance on multiple categories (instruction following, coding, longer queries and creative writing). This advanced system demonstrates significant progress toward scalable superintelligence through policy improvement.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepcogito/cogito-v2.1-671b-20251118", "created": 1763071233 } }, { "id": "gpt-5.1", "description": "GPT-5 is OpenAI’s most advanced language model, designed for complex tasks that require step-by-step reasoning, precise instruction following, and high reliability. It improves reasoning, code generation, and prompt understanding—including test-time routing and intent cues like “think hard about this”—while reducing hallucination and sycophancy.", "capabilities": [ "REASONING", "WEB_SEARCH", "FUNCTION_CALL", "STRUCTURED_OUTPUT", "FILE_INPUT", "CODE_EXECUTION", "COMPUTER_USE", "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.125, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "web", "tools", "deepsearch", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,web,tools,deepsearch,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5.1-20251113", "created": 1763060305 }, "name": "OpenAI: GPT-5.1", "owned_by": "openrouter" }, { "id": "gpt-5.1-chat", "name": "OpenAI: GPT-5.1 Chat", "description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.1 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.\n", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "WEB_SEARCH", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.125, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5.1-chat-20251113", "created": 1763060302 } }, { "id": "gpt-5.1-codex", "description": "GPT-5.1-Codex is a version of GPT-5 optimized for agentic coding tasks in Codex or similar environments. It's available in the Responses API only and the underlying model snapshot will be regularly updated. ", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.125, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5.1-codex-20251113", "created": 1763060298 }, "name": "OpenAI: GPT-5.1-Codex", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-5.1-codex-mini", "description": "GPT-5.1 Codex mini is a smaller, more cost-effective, less-capable version of GPT-5.1-Codex.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.025, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5.1-codex-mini-20251113", "created": 1763057820 }, "name": "OpenAI: GPT-5.1-Codex-Mini", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "kat-coder-pro:free", "name": "Kwaipilot: KAT-Coder-Pro V1 (free)", "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. \n\nThe model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 32768, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "kwaipilot/kat-coder-pro-v1", "created": 1762745912 } }, { "id": "kimi-linear-48b-a3b-instruct", "name": "MoonshotAI: Kimi Linear 48B A3B Instruct", "description": "Kimi Linear is a hybrid linear attention architecture that outperforms traditional full attention methods across various contexts, including short, long, and reinforcement learning (RL) scaling regimes. At its core is Kimi Delta Attention (KDA)—a refined version of Gated DeltaNet that introduces a more efficient gating mechanism to optimize the use of finite-state RNN memory.\n\nKimi Linear achieves superior performance and hardware efficiency, especially for long-context tasks. It reduces the need for large KV caches by up to 75% and boosts decoding throughput by up to 6x for contexts as long as 1M tokens.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "max_output_tokens": 1048576, "pricing": { "input": { "per_million_tokens": 0.7, "currency": "USD" }, "output": { "per_million_tokens": 0.8999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "moonshotai/kimi-linear-48b-a3b-instruct-20251029", "created": 1762565833 } }, { "id": "kimi-k2-thinking", "description": "Kimi K2 Thinking is Moonshot AI's most advanced open-source inference model to date, extending the K2 series into intelligent agent and long-context inference domains. The model is built on the trillion-parameter mixture of experts (MoE) architecture introduced by Kimi K2, activating 32 billion parameters per forward pass and supporting a context window of 256,000 tokens.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 262144, "max_output_tokens": 262144, "pricing": { "input": { "per_million_tokens": 0.548, "currency": "USD" }, "output": { "per_million_tokens": 2.192, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.137, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "moonshotai/kimi-k2-thinking-20251106", "created": 1762440622 }, "name": "MoonshotAI: Kimi K2 Thinking", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "nova-premier-v1", "name": "Amazon: Nova Premier 1.0", "description": "Amazon Nova Premier is the most capable of Amazon’s multimodal models for complex reasoning tasks and for use as the best teacher for distilling custom models.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 12.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.625, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "tools", "Nova" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "amazon/nova-premier-v1", "created": 1761950332 } }, { "id": "sonar-pro-search", "name": "Perplexity: Sonar Pro Search", "description": "Exclusively available on the OpenRouter API, Sonar Pro's new Pro Search mode is Perplexity's most advanced agentic search system. It is designed for deeper reasoning and analysis. Pricing is based on tokens plus $18 per thousand requests. This model powers the Pro Search mode on the Perplexity platform.\n\nSonar Pro Search adds autonomous, multi-step reasoning to Sonar Pro. So, instead of just one query + synthesis, it plans and executes entire research workflows using tools.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 8000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "web_search_options" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "perplexity/sonar-pro-search", "created": 1761854366 } }, { "id": "voxtral-small-24b-2507", "name": "Mistral: Voxtral Small 24B 2507", "description": "Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding. Input audio is priced at $100 per million seconds.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "AUDIO_RECOGNITION" ], "input_modalities": [ "TEXT", "AUDIO" ], "output_modalities": [ "TEXT" ], "context_window": 32000, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/voxtral-small-24b-2507", "created": 1761835144 } }, { "id": "gpt-oss-safeguard-20b", "name": "OpenAI: gpt-oss-safeguard-20b", "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.075, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.037, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-oss-safeguard-20b", "created": 1761752836 } }, { "id": "nemotron-nano-12b-v2-vl:free", "name": "NVIDIA: Nemotron Nano 12B 2 VL (free)", "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME—surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING", "IMAGE_RECOGNITION", "VIDEO_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT", "VIDEO" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 128000, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "tools" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "nvidia/nemotron-nano-12b-v2-vl", "created": 1761675565 } }, { "id": "nemotron-nano-12b-v2-vl", "name": "NVIDIA: Nemotron Nano 12B 2 VL", "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME—surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION", "VIDEO_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT", "VIDEO" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "nvidia/nemotron-nano-12b-v2-vl", "created": 1761675565 } }, { "id": "minimax-m2", "description": "MiniMax-M2 redefines efficiency for intelligent agents. It is a compact, fast, and cost-effective MoE model with a total of 230 billion parameters and 10 billion active parameters, designed for top performance in coding and intelligent agent tasks while maintaining strong general intelligence. With only 10 billion active parameters, MiniMax-M2 delivers the complex end-to-end tool usage performance expected from today's leading models, but in a more streamlined form factor, making deployment and scaling easier than ever before.", "input_modalities": [ "TEXT" ], "context_window": 204800, "max_output_tokens": 192000, "pricing": { "input": { "per_million_tokens": 0.288, "currency": "USD" }, "output": { "per_million_tokens": 1.152, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "minimax/minimax-m2", "created": 1761252093 }, "name": "MiniMax: MiniMax M2", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "output_modalities": [ "TEXT" ] }, { "id": "lfm2-8b-a1b", "name": "LiquidAI/LFM2-8B-A1B", "description": "Model created via inbox interface", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "liquid/lfm2-8b-a1b", "created": 1760970984 } }, { "id": "lfm-2.2-6b", "name": "LiquidAI/LFM2-2.6B", "description": "LFM2 is a new generation of hybrid models developed by Liquid AI, specifically designed for edge AI and on-device deployment. It sets a new standard in terms of quality, speed, and memory efficiency.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "liquid/lfm-2.2-6b", "created": 1760970889 } }, { "id": "granite-4.0-h-micro", "name": "IBM: Granite 4.0 Micro", "description": "Granite-4.0-H-Micro is a 3B parameter from the Granite 4 family of models. These models are the latest in a series of models released by IBM. They are fine-tuned for long context tool calling. ", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131000, "pricing": { "input": { "per_million_tokens": 0.017, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "ibm-granite/granite-4.0-h-micro", "created": 1760927695 } }, { "id": "cogito-v2-preview-llama-405b", "name": "Deep Cogito: Cogito V2 Preview Llama 405B", "description": "Cogito v2 405B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection. It represents a significant step toward frontier intelligence with dense architecture delivering performance competitive with leading closed models. This advanced reasoning system combines policy improvement with massive scale for exceptional capabilities.\n", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 3.5, "currency": "USD" }, "output": { "per_million_tokens": 3.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepcogito/cogito-v2-preview-llama-405b", "created": 1760709933 } }, { "id": "gpt-5-image-mini", "name": "OpenAI: GPT-5 Image Mini", "description": "GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation. This natively multimodal model features superior instruction following, text rendering, and detailed image editing with reduced latency and cost. It excels at high-quality visual creation while maintaining strong text understanding, making it ideal for applications that require both efficient image generation and text processing at scale.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "WEB_SEARCH", "IMAGE_RECOGNITION", "IMAGE_GENERATION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "VISION", "TEXT" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.25, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text+image", "include_reasoning", "reasoning", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text+image->text+image", "canonical_slug": "openai/gpt-5-image-mini", "created": 1760624583 } }, { "id": "claude-haiku-4.5", "name": "Anthropic: Claude Haiku 4.5", "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\n\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-4.5-haiku-20251001", "created": 1760547638 } }, { "id": "qwen3-vl-8b-thinking", "name": "Qwen: Qwen3 VL 8B Thinking", "description": "Qwen3-VL-8B-Thinking is the reasoning-optimized variant of the Qwen3-VL-8B multimodal model, designed for advanced visual and textual reasoning across complex scenes, documents, and temporal sequences. It integrates enhanced multimodal alignment and long-context processing (native 256K, expandable to 1M tokens) for tasks such as scientific visual analysis, causal inference, and mathematical reasoning over image or video inputs.\n\nCompared to the Instruct edition, the Thinking version introduces deeper visual-language fusion and deliberate reasoning pathways that improve performance on long-chain logic tasks, STEM problem-solving, and multi-step video understanding. It achieves stronger temporal grounding via Interleaved-MRoPE and timestamp-aware embeddings, while maintaining robust OCR, multilingual comprehension, and text generation on par with large text-only LLMs.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 2.0999999999999996, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen3-vl-8b-thinking", "created": 1760463746 } }, { "id": "qwen3-vl-8b-instruct", "name": "Qwen: Qwen3 VL 8B Instruct", "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.064, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen3-vl-8b-instruct", "created": 1760463308 } }, { "id": "gpt-5-image", "name": "OpenAI: GPT-5 Image", "description": "[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's most advanced language model with state-of-the-art image generation capabilities. It offers major improvements in reasoning, code quality, and user experience while incorporating GPT Image 1's superior instruction following, text rendering, and detailed image editing.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "WEB_SEARCH", "IMAGE_RECOGNITION", "IMAGE_GENERATION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "VISION", "TEXT" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text+image", "include_reasoning", "reasoning", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text+image->text+image", "canonical_slug": "openai/gpt-5-image", "created": 1760447986 } }, { "id": "o3-deep-research", "name": "OpenAI: o3 Deep Research", "description": "o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "WEB_SEARCH", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 100000, "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 40, "currency": "USD" }, "cache_read": { "per_million_tokens": 2.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/o3-deep-research-2025-06-26", "created": 1760129661 } }, { "id": "o4-mini-deep-research", "name": "OpenAI: o4 Mini Deep Research", "description": "o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "WEB_SEARCH", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 100000, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/o4-mini-deep-research-2025-06-26", "created": 1760129642 } }, { "id": "llama-3.3-nemotron-super-49b-v1.5", "name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", "description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context. It’s post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and multi-turn chat, followed by multiple RL stages; Reward-aware Preference Optimization (RPO) for alignment, RL with Verifiable Rewards (RLVR) for step-wise reasoning, and iterative DPO to refine tool-use behavior. A distillation-driven Neural Architecture Search (“Puzzle”) replaces some attention blocks and varies FFN widths to shrink memory footprint and improve throughput, enabling single-GPU (H100/H200) deployment while preserving instruction following and CoT quality.\n\nIn internal evaluations (NeMo-Skills, up to 16 runs, temp = 0.6, top_p = 0.95), the model reports strong reasoning/coding results, e.g., MATH500 pass@1 = 97.4, AIME-2024 = 87.5, AIME-2025 = 82.71, GPQA = 71.97, LiveCodeBench (24.10–25.02) = 73.58, and MMLU-Pro (CoT) = 79.53. The model targets practical inference efficiency (high tokens/s, reduced VRAM) with Transformers/vLLM support and explicit “reasoning on/off” modes (chat-first defaults, greedy recommended when disabled). Suitable for building agents, assistants, and long-context retrieval systems where balanced accuracy-to-cost and reliable tool use matter.\n", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", "created": 1760101395 } }, { "id": "ernie-4.5-21b-a3b-thinking", "name": "Baidu: ERNIE 4.5 21B A3B Thinking", "description": "ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation, and expert-level academic benchmarks.", "owned_by": "openrouter", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.056, "currency": "USD" }, "output": { "per_million_tokens": 0.224, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "baidu/ernie-4.5-21b-a3b-thinking", "created": 1760048887 } }, { "id": "gemini-2.5-flash-image", "description": "Gemini 2.5 Flash Image (Nano-Banana) is a state-of-the-art image generation and editing model that enables seamless blending of multiple images into a single composition while maintaining character consistency for rich visual storytelling. It supports precise, targeted image transformations through natural language instructions and leverages built-in world knowledge for both image generation and editing, making it well suited for creative design, content production, advertising, and visual expression workflows.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "VISION" ], "context_window": 32800, "max_output_tokens": 8000, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.499, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation", "original_architecture": "text+image->text+image", "canonical_slug": "google/gemini-2.5-flash-image", "created": 1759870431 }, "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", "owned_by": "openrouter" }, { "id": "qwen3-vl-30b-a3b-thinking", "description": "The Qwen3-VL series’ second-largest MoE model Thinking version offers fast response speed, stronger multimodal understanding and reasoning, visual agent capabilities, and ultra-long context support for long videos and long documents; it features comprehensive upgrades in image/video understanding, spatial perception, and universal recognition abilities, making it capable of handling complex real-world tasks.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "context_window": 128000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.1028, "currency": "USD" }, "output": { "per_million_tokens": 1.028, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen3-vl-30b-a3b-thinking", "created": 1759794479 }, "name": "Qwen: Qwen3 VL 30B A3B Thinking", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "qwen3-vl-30b-a3b-instruct", "description": "The Qwen3-VL series’ second-largest MoE model Instruct version offers fast response speed and supports ultra-long contexts such as long videos and long documents; it features comprehensive upgrades in image/video understanding, spatial perception, and universal recognition abilities; it also provides visual 2DD/3D localization capabilities, making it capable of handling complex real-world tasks.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "context_window": 128000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.1028, "currency": "USD" }, "output": { "per_million_tokens": 0.4112, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen3-vl-30b-a3b-instruct", "created": 1759794476 }, "name": "Qwen: Qwen3 VL 30B A3B Instruct", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-5-pro", "description": "GPT-5 pro uses more compute to think harder and provide consistently better answers.\n\nGPT-5 pro is available in the Responses API only to enable support for multi-turn model interactions before responding to API requests, and other advanced API features in the future. Since GPT-5 pro is designed to tackle tough problems, some requests may take several minutes to finish. To avoid timeouts, try using background mode. As our most advanced reasoning model, GPT-5 pro defaults to (and only supports) reasoning.effort: high. GPT-5 pro does not support code interpreter.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 120, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5-pro-2025-10-06", "created": 1759776663 }, "name": "OpenAI: GPT-5 Pro", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "glm-4.6", "description": "GLM-4.6 is Zhipu’s latest flagship model (total parameters 355B, activation parameters 32B), comprehensively surpassing GLM-4.5. Its coding capability is aligned with Claude Sonnet 4, making it a top domestic coding model; the context window has been expanded from 128K to 200K, better suited for long code and agent tasks; inference capabilities have been significantly enhanced and support tool invocation during processing; improvements have been made in tool calling, search agents, writing style, role play, and multilingual translation. The model is named glm-4.6 and is provided by three vendors, with calls prioritized to the Sophnet platform.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 204800, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "z-ai/glm-4.6", "created": 1759235576 }, "name": "Z.AI: GLM 4.6", "owned_by": "openrouter" }, { "id": "glm-4.6:exacto", "name": "Z.AI: GLM 4.6 (exacto)", "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 202752, "pricing": { "input": { "per_million_tokens": 0.43, "currency": "USD" }, "output": { "per_million_tokens": 1.75, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0799999993, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "z-ai/glm-4.6", "created": 1759235576 } }, { "id": "claude-sonnet-4.5", "name": "Anthropic: Claude Sonnet 4.5", "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with improvements across system design, code security, and specification adherence. The model is designed for extended autonomous operation, maintaining task continuity across sessions and providing fact-based progress tracking.\n\nSonnet 4.5 also introduces stronger agentic capabilities, including improved tool orchestration, speculative parallel execution, and more efficient context and memory management. With enhanced context tracking and awareness of token usage across tool calls, it is particularly well-suited for multi-context and long-running workflows. Use cases span software engineering, cybersecurity, financial analysis, research agents, and other domains requiring sustained reasoning and tool use.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-4.5-sonnet-20250929", "created": 1759161676 } }, { "id": "deepseek-v3.2-exp", "description": "The model DeepSeek-V3.2-Exp is officially named deepseek-chat on the website. It is an experimental version. As an intermediate step towards the next-generation architecture, V3.2-Exp introduces DeepSeek Sparse Attention (a sparse attention mechanism) based on V3.1-Terminus, exploring and validating", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 163000, "max_output_tokens": 163000, "pricing": { "input": { "per_million_tokens": 0.274, "currency": "USD" }, "output": { "per_million_tokens": 0.411, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0274, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-v3.2-exp", "created": 1759150481 }, "name": "DeepSeek: DeepSeek V3.2 Exp", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "cydonia-24b-v4.1", "name": "TheDrummer: Cydonia 24B V4.1", "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "thedrummer/cydonia-24b-v4.1", "created": 1758931878 } }, { "id": "relace-apply-3", "name": "Relace: Relace Apply 3", "description": "Relace Apply 3 is a specialized code-patching LLM that merges AI-suggested edits straight into your source files. It can apply updates from GPT-4o, Claude, and others into your files at 10,000 tokens/sec on average.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Relace. Learn more about this model in their [documentation](https://docs.relace.ai/api-reference/instant-apply/apply)", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 0.85, "currency": "USD" }, "output": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "relace/relace-apply-3", "created": 1758891572 } }, { "id": "gemini-2.5-flash-preview-09-2025", "description": "This latest 2.5 Flash model comes with improvements in two key areas we heard consistent feedback on:\n\nBetter agentic tool use: We've improved how the model uses tools, leading to better performance in more complex, agentic and multi-step applications. This model shows noticeable improvements on key agentic benchmarks, including a 5% gain on SWE-Bench Verified, compared to our last release (48.9% → 54%). More efficient: With thinking on, the model is now significantly more cost-efficient—achieving higher quality outputs while using fewer tokens, reducing latency and cost (see charts above).", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.499, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-2.5-flash-preview-09-2025", "created": 1758820178 }, "name": "Google: Gemini 2.5 Flash Preview 09-2025", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gemini-2.5-flash-lite-preview-09-2025", "description": "gemini-2.5-flash-lite latest preview version", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.025, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-2.5-flash-lite-preview-09-2025", "created": 1758819686 }, "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "qwen3-vl-235b-a22b-thinking", "description": "The Qwen3 series open-source models include hybrid models, thinking models, and non-thinking models, with both reasoning capabilities and general abilities reaching industry SOTA levels at the same scale.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "context_window": 131000, "max_output_tokens": 33000, "pricing": { "input": { "per_million_tokens": 0.274, "currency": "USD" }, "output": { "per_million_tokens": 2.74, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen3-vl-235b-a22b-thinking", "created": 1758668690 }, "name": "Qwen: Qwen3 VL 235B A22B Thinking", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "qwen3-vl-235b-a22b-instruct", "description": "The Qwen3 series open-source models include hybrid models, thinking models, and non-thinking models, with both reasoning capabilities and general abilities reaching industry SOTA levels at the same scale.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "context_window": 131000, "max_output_tokens": 33000, "pricing": { "input": { "per_million_tokens": 0.274, "currency": "USD" }, "output": { "per_million_tokens": 1.096, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen3-vl-235b-a22b-instruct", "created": 1758668687 }, "name": "Qwen: Qwen3 VL 235B A22B Instruct", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "qwen3-max", "description": "The Tongyi Qianwen 3 series Max model has undergone special upgrades in intelligent agent programming and tool invocation compared to the preview version. The officially released model this time reaches SOTA level in the field and is adapted to more complex intelligent agent scenarios.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 262144, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.822, "currency": "USD" }, "output": { "per_million_tokens": 3.288, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.822, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-max", "created": 1758662808 }, "name": "Qwen: Qwen3 Max", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "qwen3-coder-plus", "description": "The code generation model based on Qwen3 has powerful Coding Agent capabilities, excels in tool invocation and environment interaction, and can achieve autonomous programming with outstanding coding abilities while also possessing general capabilities.The model adopts tiered pricing.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.54, "currency": "USD" }, "output": { "per_million_tokens": 2.16, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.108, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-coder-plus", "created": 1758662707 }, "name": "Qwen: Qwen3 Coder Plus", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-5-codex", "description": "GPT-5-Codex is a version of GPT-5 optimized for autonomous coding tasks in Codex or similar environments. It is only available in the Responses API, and the underlying model snapshots will be updated regularly. https://docs.aihubmix.com/en/api/Responses-API You can also use it in codex-cll; see https://docs.aihubmix.com/en/api/Codex-CLI for using codex-cll through Aihubmix.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.125, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5-codex", "created": 1758643403 }, "name": "OpenAI: GPT-5 Codex", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "deepseek-v3.1-terminus:exacto", "name": "DeepSeek: DeepSeek V3.1 Terminus (exacto)", "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.21, "currency": "USD" }, "output": { "per_million_tokens": 0.7899999999999999, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.16799999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-v3.1-terminus", "created": 1758548275 } }, { "id": "deepseek-v3.1-terminus", "description": "DeepSeek-V3.1 non-thinking mode has now been updated to the DeepSeek-V3.1-Terminus version.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 160000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.56, "currency": "USD" }, "output": { "per_million_tokens": 1.68, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.16799999999999998, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-v3.1-terminus", "created": 1758548275 }, "name": "DeepSeek: DeepSeek V3.1 Terminus", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "grok-4-fast", "name": "xAI: Grok 4 Fast", "description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window. It comes in two flavors: non-reasoning and reasoning. Read more about the model on xAI's [news post](http://x.ai/news/grok-4-fast).\n\nReasoning can be enabled/disabled using the `reasoning` `enabled` parameter in the API. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#controlling-reasoning-tokens)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 2000000, "max_output_tokens": 30000, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.049999999999999996, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Grok" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "x-ai/grok-4-fast", "created": 1758240090 } }, { "id": "tongyi-deepresearch-30b-a3b:free", "name": "Tongyi DeepResearch 30B A3B (free)", "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\n\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "alibaba/tongyi-deepresearch-30b-a3b", "created": 1758210804 } }, { "id": "tongyi-deepresearch-30b-a3b", "name": "Tongyi DeepResearch 30B A3B", "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\n\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.09, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "alibaba/tongyi-deepresearch-30b-a3b", "created": 1758210804 } }, { "id": "qwen3-coder-flash", "description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.136, "currency": "USD" }, "output": { "per_million_tokens": 0.544, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.136, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-coder-flash", "created": 1758115536 }, "name": "Qwen: Qwen3 Coder Flash", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "internvl3-78b", "name": "OpenGVLab: InternVL3 78B", "description": "The InternVL3 series is an advanced multimodal large language model (MLLM). Compared to InternVL 2.5, InternVL3 demonstrates stronger multimodal perception and reasoning capabilities. \n\nIn addition, InternVL3 is benchmarked against the Qwen2.5 Chat models, whose pre-trained base models serve as the initialization for its language component. Benefiting from Native Multimodal Pre-Training, the InternVL3 series surpasses the Qwen2.5 series in overall text performance.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "opengvlab/internvl3-78b", "created": 1757962555 } }, { "id": "qwen3-next-80b-a3b-thinking", "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that excels by outputting structured 'thinking' traces (Chain-of-Thought) by default.\n\nDesigned for hard, multi-step problems, it is ideal for tasks like math proofs, code synthesis, logic puzzles, and agentic planning. Compared to other Qwen3 variants, it offers greater stability during long reasoning chains and is tuned to follow complex instructions without getting repetitive or off-task.\n\nThis model is perfectly suited for agent frameworks, tool use (function calling), and benchmarks where a step-by-step breakdown is required. It leverages throughput-oriented techniques for fast generation of detailed, procedural outputs.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 256000, "max_output_tokens": 256000, "pricing": { "input": { "per_million_tokens": 0.138, "currency": "USD" }, "output": { "per_million_tokens": 1.38, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-next-80b-a3b-thinking-2509", "created": 1757612284 }, "name": "Qwen: Qwen3 Next 80B A3B Thinking", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "qwen3-next-80b-a3b-instruct", "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned model in the Qwen3-Next series, optimized for delivering fast, stable, and direct final answers without showing its reasoning steps (\"thinking traces\").\n\nUnlike chain-of-thought models, it focuses on generating consistent, instruction-following outputs, making it ideal for production environments. It excels at complex tasks like reasoning and coding while maintaining high throughput and stability, especially with ultra-long inputs and multi-turn dialogues.\n\nEngineered for efficiency, its performance rivals larger Qwen3 systems, making it perfectly suited for RAG, tool use, and agentic workflows where deterministic results are critical.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 256000, "max_output_tokens": 256000, "pricing": { "input": { "per_million_tokens": 0.138, "currency": "USD" }, "output": { "per_million_tokens": 0.552, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-next-80b-a3b-instruct-2509", "created": 1757612213 }, "name": "Qwen: Qwen3 Next 80B A3B Instruct", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "longcat-flash-chat:free", "name": "Meituan: LongCat Flash Chat (free)", "description": "LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B–31.3B (≈27B on average) are dynamically activated per input. It introduces a shortcut-connected MoE design to reduce communication overhead and achieve high throughput while maintaining training stability through advanced scaling strategies such as hyperparameter transfer, deterministic computation, and multi-stage optimization.\n\nThis release, LongCat-Flash-Chat, is a non-thinking foundation model optimized for conversational and agentic tasks. It supports long context windows up to 128K tokens and shows competitive performance across reasoning, coding, instruction following, and domain benchmarks, with particular strengths in tool use and complex multi-step interactions.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meituan/longcat-flash-chat", "created": 1757427658 } }, { "id": "longcat-flash-chat", "description": "Meituan has officially released and open-sourced LongCat-Flash-Chat, which utilizes an innovative Mixture of Experts (MoE) and \"zero-computation expert\" mechanism to achieve a total of 560B parameters, while only activating around 27B parameters per token as needed. At the same time, end-to-end optimization for agents (including a self-built evaluation set and multi-agent trajectory data) significantly enhances its performance in tool usage and complex task orchestration.", "pricing": { "input": { "per_million_tokens": 0.14, "currency": "USD" }, "output": { "per_million_tokens": 0.7, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meituan/longcat-flash-chat", "created": 1757427658 }, "name": "Meituan: LongCat Flash Chat", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072 }, { "id": "qwen-plus-2025-07-28", "description": "The Tongyi Qianwen series balanced capability model has inference performance and speed between Tongyi Qianwen-Max and Tongyi Qianwen-Turbo, making it suitable for moderately complex tasks. This model adopts tiered pricing.", "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.275, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen-plus-2025-07-28", "created": 1757347599 }, "name": "Qwen: Qwen Plus 0728", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "max_output_tokens": 32768 }, { "id": "qwen-plus-2025-07-28:thinking", "name": "Qwen: Qwen Plus 0728 (thinking)", "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen-plus-2025-07-28", "created": 1757347599 } }, { "id": "nemotron-nano-9b-v2:free", "name": "NVIDIA: Nemotron Nano 9B V2 (free)", "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nvidia/nemotron-nano-9b-v2", "created": 1757106807 } }, { "id": "nemotron-nano-9b-v2", "name": "NVIDIA: Nemotron Nano 9B V2", "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.16, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nvidia/nemotron-nano-9b-v2", "created": 1757106807 } }, { "id": "kimi-k2-0905", "description": "Kimi-K2-0905 is a large-scale Mixture of Experts (MoE) language model developed by Moonshot AI, with a total of 1 trillion parameters and 32 billion active parameters per forward pass. It supports long-context inference of up to 256k tokens, an expansion from the previous 128k.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 262144, "max_output_tokens": 262144, "pricing": { "input": { "per_million_tokens": 0.548, "currency": "USD" }, "output": { "per_million_tokens": 2.192, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "moonshotai/kimi-k2-0905", "created": 1757021147 }, "name": "MoonshotAI: Kimi K2 0905", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "kimi-k2-0905:exacto", "name": "MoonshotAI: Kimi K2 0905 (exacto)", "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 2.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "moonshotai/kimi-k2-0905", "created": 1757021147 } }, { "id": "cogito-v2-preview-llama-70b", "name": "Deep Cogito: Cogito V2 Preview Llama 70B", "description": "Cogito v2 70B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection. Built with iterative policy improvement, it delivers strong performance across reasoning tasks while maintaining efficiency through shorter reasoning chains and improved intuition.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.88, "currency": "USD" }, "output": { "per_million_tokens": 0.88, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepcogito/cogito-v2-preview-llama-70b", "created": 1756831784 } }, { "id": "cogito-v2-preview-llama-109b-moe", "name": "Cogito V2 Preview Llama 109B", "description": "An instruction-tuned, hybrid-reasoning Mixture-of-Experts model built on Llama-4-Scout-17B-16E. Cogito v2 can answer directly or engage an extended “thinking” phase, with alignment guided by Iterated Distillation & Amplification (IDA). It targets coding, STEM, instruction following, and general helpfulness, with stronger multilingual, tool-calling, and reasoning performance than size-equivalent baselines. The model supports long-context use (up to 10M tokens) and standard Transformers workflows. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32767, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 0.59, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "tools", "Llama4" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "deepcogito/cogito-v2-preview-llama-109b-moe", "created": 1756831568 } }, { "id": "step3", "name": "StepFun: Step3", "description": "Step3 is a cutting-edge multimodal reasoning model—built on a Mixture-of-Experts architecture with 321B total parameters and 38B active. It is designed end-to-end to minimize decoding costs while delivering top-tier performance in vision–language reasoning. Through the co-design of Multi-Matrix Factorization Attention (MFA) and Attention-FFN Disaggregation (AFD), Step3 maintains exceptional efficiency across both flagship and low-end accelerators.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.5700000000000001, "currency": "USD" }, "output": { "per_million_tokens": 1.42, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "stepfun-ai/step3", "created": 1756415375 } }, { "id": "qwen3-30b-a3b-thinking-2507", "description": "Significantly improved performance on reasoning tasks, including logical reasoning, mathematics, science, coding, and academic benchmarks that typically require human expertise.\nMarkedly better general capabilities, such as instruction following, tool usage, text generation, and alignment with human preferences.\nEnhanced 256K long-context understanding capabilities.", "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-30b-a3b-thinking-2507", "created": 1756399192 }, "name": "Qwen: Qwen3 30B A3B Thinking 2507", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768 }, { "id": "grok-code-fast-1", "name": "xAI: Grok Code Fast 1", "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality work flows.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 10000, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 1.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Grok" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "x-ai/grok-code-fast-1", "created": 1756238927 } }, { "id": "hermes-4-70b", "name": "Nous: Hermes 4 70B", "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit ... reasoning traces before answering. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.38, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nousresearch/hermes-4-70b", "created": 1756236182 } }, { "id": "hermes-4-405b", "name": "Nous: Hermes 4 405B", "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with ... traces or respond directly, offering flexibility between speed and depth. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nousresearch/hermes-4-405b", "created": 1756235463 } }, { "id": "gemini-2.5-flash-image-preview", "description": "Aihubmix supports the gemini-2.5-flash-image-preview model; you can add extra parameters modalities=[\"text\", \"image\"] through the OpenAI-compatible chat interface; https://docs.aihubmix.com/en/api/Gemini-Guides#gemini-2-5-flash%3A-quick-task-support", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "VISION" ], "context_window": 32800, "max_output_tokens": 8000, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation", "original_architecture": "text+image->text+image", "canonical_slug": "google/gemini-2.5-flash-image-preview", "created": 1756218977 }, "name": "Google: Gemini 2.5 Flash Image Preview (Nano Banana)", "owned_by": "openrouter" }, { "id": "deepseek-chat-v3.1", "name": "DeepSeek: DeepSeek V3.1", "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "max_output_tokens": 163840, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.7999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-chat-v3.1", "created": 1755779628 } }, { "id": "gpt-4o-audio-preview", "description": "OpenAI voice input and output model, with prices consistent with the official ones. For now, only the text portion prices are displayed; voice prices can be found on the official OpenAI website. Backend billing is the same as the official.", "input_modalities": [ "TEXT", "AUDIO" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-4o-audio-preview", "created": 1755233061 }, "name": "OpenAI: GPT-4o Audio", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "AUDIO_RECOGNITION" ], "output_modalities": [ "TEXT" ] }, { "id": "mistral-medium-3.1", "name": "Mistral: Mistral Medium 3.1", "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3.1 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/mistral-medium-3.1", "created": 1755095639 } }, { "id": "ernie-4.5-21b-a3b", "name": "Baidu: ERNIE 4.5 21B A3B", "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 120000, "max_output_tokens": 8000, "pricing": { "input": { "per_million_tokens": 0.056, "currency": "USD" }, "output": { "per_million_tokens": 0.224, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "baidu/ernie-4.5-21b-a3b", "created": 1755034167 } }, { "id": "ernie-4.5-vl-28b-a3b", "name": "Baidu: ERNIE 4.5 VL 28B A3B", "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 30000, "max_output_tokens": 8000, "pricing": { "input": { "per_million_tokens": 0.112, "currency": "USD" }, "output": { "per_million_tokens": 0.448, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "tools" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "baidu/ernie-4.5-vl-28b-a3b", "created": 1755032836 } }, { "id": "glm-4.5v", "description": "GLM-4.5V is a vision-language foundational model designed for multimodal agent applications. Based on a mixture-of-experts (MoE) architecture, it has 106 billion parameters and 12 billion active parameters. It delivers outstanding performance in video understanding, image question answering, OCR, and document parsing, and achieves significant improvements in front-end web encoding, basic reasoning, and spatial reasoning.", "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "context_window": 64000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.274, "currency": "USD" }, "output": { "per_million_tokens": 0.822, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.274, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "z-ai/glm-4.5v", "created": 1754922288 }, "name": "Z.AI: GLM 4.5V", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "output_modalities": [ "TEXT" ] }, { "id": "jamba-mini-1.7", "name": "AI21: Jamba Mini 1.7", "description": "Jamba Mini 1.7 is a compact and efficient member of the Jamba open model family, incorporating key improvements in grounding and instruction-following while maintaining the benefits of the SSM-Transformer hybrid architecture and 256K context window. Despite its compact size, it delivers accurate, contextually grounded responses and improved steerability.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 4096, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "ai21/jamba-mini-1.7", "created": 1754670601 } }, { "id": "jamba-large-1.7", "name": "AI21: Jamba Large 1.7", "description": "Jamba Large 1.7 is the latest model in the Jamba open family, offering improvements in grounding, instruction-following, and overall efficiency. Built on a hybrid SSM-Transformer architecture with a 256K context window, it delivers more accurate, contextually grounded responses and better steerability than previous versions.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 4096, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "ai21/jamba-large-1.7", "created": 1754669020 } }, { "id": "gpt-5-chat", "name": "OpenAI: GPT-5 Chat", "description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "WEB_SEARCH", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.125, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "GPT" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5-chat-2025-08-07", "created": 1754587837 } }, { "id": "gpt-5", "description": "GPT-5 is OpenAI’s most advanced general-purpose model, delivering major improvements in reasoning, code quality, and overall user experience. It is optimized for complex tasks that require step-by-step reasoning, precise instruction following, and high accuracy in high-stakes scenarios. The model supports test-time routing and advanced prompt understanding, including user-specified intent such as “think hard about this,” while significantly reducing hallucination and sycophancy and improving performance in coding, writing, and health-related tasks.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.125, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5-2025-08-07", "created": 1754587413 }, "name": "OpenAI: GPT-5", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-5-mini", "description": "GPT-5 mini is a faster, more cost-efficient version of GPT-5. It's great for well-defined tasks and precise prompts.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.025, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5-mini-2025-08-07", "created": 1754587407 }, "name": "OpenAI: GPT-5 Mini", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-5-nano", "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, designed specifically for developer tools and environments that demand rapid interactions and ultra-low latency. While it offers a more lightweight solution with limited reasoning depth compared to its larger counterparts, GPT-5-Nano excels in core capabilities such as instruction-following and maintaining critical safety features. As the successor to GPT-4.1-nano, it provides an optimal choice for cost-sensitive or real-time applications, where efficiency and speed are paramount. Particularly well-suited for summarization and classification tasks, GPT-5-Nano is a powerful tool for developers needing a swift, reliable AI model for streamlined processes.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.005, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-5-nano-2025-08-07", "created": 1754587402 }, "name": "OpenAI: GPT-5 Nano", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-oss-120b:free", "name": "OpenAI: gpt-oss-120b (free)", "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-oss-120b", "created": 1754414231 } }, { "id": "gpt-oss-120b", "description": "gpt-oss-120b is a 117B-parameter open-weight Mixture-of-Experts (MoE) language model from OpenAI, designed for high-reasoning, agentic, and general-purpose production use cases. Activating just 5.1B parameters per pass, it is optimized to run on a single H100 GPU with native MXFP4 quantization. The model features configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 0.9, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-oss-120b", "created": 1754414231 }, "name": "OpenAI: gpt-oss-120b", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-oss-120b:exacto", "name": "OpenAI: gpt-oss-120b (exacto)", "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.039, "currency": "USD" }, "output": { "per_million_tokens": 0.19, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-oss-120b", "created": 1754414231 } }, { "id": "gpt-oss-20b:free", "name": "OpenAI: gpt-oss-20b (free)", "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 128000, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-oss-20b", "created": 1754414229 } }, { "id": "gpt-oss-20b", "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.55, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-oss-20b", "created": 1754414229 }, "name": "OpenAI: gpt-oss-20b", "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072 }, { "id": "claude-opus-4.1", "name": "Anthropic: Claude Opus 4.1", "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 75, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-4.1-opus-20250805", "created": 1754411591 } }, { "id": "codestral-2508", "name": "Mistral: Codestral 2508", "description": "Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.8999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/codestral-2508", "created": 1754079630 } }, { "id": "qwen3-coder-30b-a3b-instruct", "description": "The code generation model based on Qwen3 has powerful Coding Agent capabilities, achieving state-of-the-art performance compared to open-source models.The model adopts tiered pricing.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 2000000, "max_output_tokens": 262000, "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-coder-30b-a3b-instruct", "created": 1753972379 }, "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "qwen3-30b-a3b-instruct-2507", "description": "Significantly improved performance on reasoning tasks, including logical reasoning, mathematics, science, coding, and academic benchmarks that typically require human expertise.\nMarkedly better general capabilities, such as instruction following, tool usage, text generation, and alignment with human preferences.\nEnhanced 256K long-context understanding capabilities.", "pricing": { "input": { "per_million_tokens": 0.1028, "currency": "USD" }, "output": { "per_million_tokens": 0.4112, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-30b-a3b-instruct-2507", "created": 1753806965 }, "name": "Qwen: Qwen3 30B A3B Instruct 2507", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "max_output_tokens": 262144 }, { "id": "glm-4.5", "description": "GLM-4.5", "input_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 98304, "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.6, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "z-ai/glm-4.5", "created": 1753471347 }, "name": "Z.AI: GLM 4.5", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "output_modalities": [ "TEXT" ] }, { "id": "glm-4.5-air:free", "name": "Z.AI: GLM 4.5 Air (free)", "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "z-ai/glm-4.5-air", "created": 1753471258 } }, { "id": "glm-4.5-air", "input_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 98304, "pricing": { "input": { "per_million_tokens": 0.14, "currency": "USD" }, "output": { "per_million_tokens": 0.84, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "z-ai/glm-4.5-air", "created": 1753471258 }, "name": "Z.AI: GLM 4.5 Air", "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "output_modalities": [ "TEXT" ] }, { "id": "qwen3-235b-a22b-thinking-2507", "pricing": { "input": { "per_million_tokens": 0.28, "currency": "USD" }, "output": { "per_million_tokens": 2.8, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-235b-a22b-thinking-2507", "created": 1753449557 }, "name": "Qwen: Qwen3 235B A22B Thinking 2507", "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "max_output_tokens": 262144 }, { "id": "glm-4-32b", "name": "Z.AI: GLM 4 32B ", "description": "GLM 4 32B is a cost-effective foundation language model.\n\nIt can efficiently perform complex tasks and has significantly enhanced capabilities in tool use, online search, and code-related intelligent tasks.\n\nIt is made by the same lab behind the thudm models.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "z-ai/glm-4-32b-0414", "created": 1753376617 } }, { "id": "qwen3-coder:free", "name": "Qwen: Qwen3 Coder 480B A35B (free)", "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262000, "max_output_tokens": 262000, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-coder-480b-a35b-07-25", "created": 1753230546 } }, { "id": "qwen3-coder", "name": "Qwen: Qwen3 Coder 480B A35B", "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "max_output_tokens": 262144, "pricing": { "input": { "per_million_tokens": 0.22, "currency": "USD" }, "output": { "per_million_tokens": 0.95, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "reasoning", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-coder-480b-a35b-07-25", "created": 1753230546 } }, { "id": "qwen3-coder:exacto", "name": "Qwen: Qwen3 Coder 480B A35B (exacto)", "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "max_output_tokens": 262144, "pricing": { "input": { "per_million_tokens": 0.38, "currency": "USD" }, "output": { "per_million_tokens": 1.53, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "reasoning", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-coder-480b-a35b-07-25", "created": 1753230546 } }, { "id": "ui-tars-1.5-7b", "name": "ByteDance: UI-TARS 7B ", "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement learning-based reasoning, enabling robust action planning and execution across virtual interfaces.\n\nThis model achieves state-of-the-art results on a range of interactive and grounding benchmarks, including OSworld, WebVoyager, AndroidWorld, and ScreenSpot. It also demonstrates perfect task completion across diverse Poki games and outperforms prior models in Minecraft agent tasks. UI-TARS-1.5 supports thought decomposition during inference and shows strong scaling across variants, with the 1.5 version notably exceeding the performance of earlier 72B and 7B checkpoints.", "owned_by": "openrouter", "capabilities": [ "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 2048, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "bytedance/ui-tars-1.5-7b", "created": 1753205056 } }, { "id": "gemini-2.5-flash-lite", "description": "Gemini 2.5 Flash-Lite is a balanced model from Google, optimized for applications that require low-latency performance. It retains the practical capabilities of the Gemini 2.5 family, including configurable reasoning based on budget, integration with tools such as grounding via Google Search and code execution, multimodal input support, and an ultra-long context window of up to 1 million tokens, delivering a strong balance between efficiency, functionality, and cost.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.025, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs,long_context", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-2.5-flash-lite", "created": 1753200276 }, "name": "Google: Gemini 2.5 Flash Lite", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "qwen3-235b-a22b-2507", "name": "Qwen: Qwen3 235B A22B Instruct 2507", "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.071, "currency": "USD" }, "output": { "per_million_tokens": 0.463, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "reasoning", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-235b-a22b-07-25", "created": 1753119555 } }, { "id": "router", "name": "Switchpoint Router", "description": "Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library. \n\nAs the world of LLMs advances, our router gets smarter, ensuring you always benefit from the industry's newest models without changing your workflow.\n\nThis model is configured for a simple, flat rate per response here on OpenRouter. It's powered by the full routing engine from [Switchpoint AI](https://www.switchpoint.dev).", "owned_by": "openrouter", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.85, "currency": "USD" }, "output": { "per_million_tokens": 3.4, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "switchpoint/router", "created": 1752272899 } }, { "id": "kimi-k2:free", "name": "MoonshotAI: Kimi K2 0711 (free)", "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "moonshotai/kimi-k2", "created": 1752263252 } }, { "id": "kimi-k2", "name": "MoonshotAI: Kimi K2 0711", "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.456, "currency": "USD" }, "output": { "per_million_tokens": 1.8399999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "moonshotai/kimi-k2", "created": 1752263252 } }, { "id": "glm-4.1v-9b-thinking", "name": "THUDM: GLM 4.1V 9B Thinking", "description": "GLM-4.1V-9B-Thinking is a 9B parameter vision-language model developed by THUDM, based on the GLM-4-9B foundation. It introduces a reasoning-centric \"thinking paradigm\" enhanced with reinforcement learning to improve multimodal reasoning, long-context understanding (up to 64K tokens), and complex problem solving. It achieves state-of-the-art performance among models in its class, outperforming even larger models like Qwen-2.5-VL-72B on a majority of benchmark tasks. ", "owned_by": "openrouter", "capabilities": [ "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "max_output_tokens": 8000, "pricing": { "input": { "per_million_tokens": 0.028, "currency": "USD" }, "output": { "per_million_tokens": 0.1104, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "thudm/glm-4.1v-9b-thinking", "created": 1752244385 } }, { "id": "devstral-medium", "name": "Mistral: Devstral Medium", "description": "Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI. Positioned as a step up from Devstral Small, it achieves 61.6% on SWE-Bench Verified, placing it ahead of Gemini 2.5 Pro and GPT-4.1 in code-related tasks, at a fraction of the cost. It is designed for generalization across prompt styles and tool use in code agents and frameworks.\n\nDevstral Medium is available via API only (not open-weight), and supports enterprise deployment on private infrastructure, with optional fine-tuning capabilities.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/devstral-medium-2507", "created": 1752161321 } }, { "id": "devstral-small", "name": "Mistral: Devstral Small 1.1", "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and released under the Apache 2.0 license, it features a 128k token context window and supports both Mistral-style function calling and XML output formats.\n\nDesigned for agentic coding workflows, Devstral Small 1.1 is optimized for tasks such as codebase exploration, multi-file edits, and integration into autonomous development agents like OpenHands and Cline. It achieves 53.6% on SWE-Bench Verified, surpassing all other open models on this benchmark, while remaining lightweight enough to run on a single 4090 GPU or Apple silicon machine. The model uses a Tekken tokenizer with a 131k vocabulary and is deployable via vLLM, Transformers, Ollama, LM Studio, and other OpenAI-compatible runtimes.\n", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.07, "currency": "USD" }, "output": { "per_million_tokens": 0.28, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/devstral-small-2507", "created": 1752160751 } }, { "id": "dolphin-mistral-24b-venice-edition:free", "name": "Venice: Uncensored (free)", "description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an “uncensored” instruct-tuned LLM, preserving user control over alignment, system prompts, and behavior. Intended for advanced and unrestricted use cases, Venice Uncensored emphasizes steerability and transparent behavior, removing default safety and alignment layers typically found in mainstream assistant models.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "venice/uncensored", "created": 1752094966 } }, { "id": "grok-4", "description": "Grok, their latest and greatest flagship model, offers unparalleled performance in natural language, math, and reasoning – the perfect jack of all trades.\nThe current pointing model version is grok-4-0709.", "context_window": 256000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 3.3, "currency": "USD" }, "output": { "per_million_tokens": 16.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.825, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Grok" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "x-ai/grok-4-07-09", "created": 1752087689 }, "name": "xAI: Grok 4", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ] }, { "id": "gemma-3n-e2b-it:free", "name": "Google: Gemma 3n 2B (free)", "description": "Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture. Based on the MatFormer architecture, it supports nested submodels and modular composition via the Mix-and-Match framework. Gemma 3n models are optimized for low-resource deployment, offering 32K context length and strong multilingual and reasoning performance across common benchmarks. This variant is trained on a diverse corpus including code, math, web, and multimodal data.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "max_output_tokens": 2048, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "google/gemma-3n-e2b-it", "created": 1752074904 } }, { "id": "hunyuan-a13b-instruct", "name": "Tencent: Hunyuan A13B Instruct", "description": "Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought. It offers competitive benchmark performance across mathematics, science, coding, and multi-turn reasoning tasks, while maintaining high inference efficiency via Grouped Query Attention (GQA) and quantization support (FP8, GPTQ, etc.).", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.14, "currency": "USD" }, "output": { "per_million_tokens": 0.5700000000000001, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "tencent/hunyuan-a13b-instruct", "created": 1751987664 } }, { "id": "deepseek-r1t2-chimera:free", "name": "TNG: DeepSeek R1T2 Chimera (free)", "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech. It is a 671 B-parameter mixture-of-experts text-generation model assembled from DeepSeek-AI’s R1-0528, R1, and V3-0324 checkpoints with an Assembly-of-Experts merge. The tri-parent design yields strong reasoning performance while running roughly 20 % faster than the original R1 and more than 2× faster than R1-0528 under vLLM, giving a favorable cost-to-intelligence trade-off. The checkpoint supports contexts up to 60 k tokens in standard use (tested to ~130 k) and maintains consistent token behaviour, making it suitable for long-context analysis, dialogue and other open-ended generation tasks.", "owned_by": "openrouter", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "tngtech/deepseek-r1t2-chimera", "created": 1751986985 } }, { "id": "deepseek-r1t2-chimera", "name": "TNG: DeepSeek R1T2 Chimera", "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech. It is a 671 B-parameter mixture-of-experts text-generation model assembled from DeepSeek-AI’s R1-0528, R1, and V3-0324 checkpoints with an Assembly-of-Experts merge. The tri-parent design yields strong reasoning performance while running roughly 20 % faster than the original R1 and more than 2× faster than R1-0528 under vLLM, giving a favorable cost-to-intelligence trade-off. The checkpoint supports contexts up to 60 k tokens in standard use (tested to ~130 k) and maintains consistent token behaviour, making it suitable for long-context analysis, dialogue and other open-ended generation tasks.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "max_output_tokens": 163840, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "tngtech/deepseek-r1t2-chimera", "created": 1751986985 } }, { "id": "morph-v3-large", "name": "Morph: Morph V3 Large", "description": "Morph's high-accuracy apply model for complex code edits. ~4,500 tokens/sec with 98% accuracy for precise code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.8999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 1.9, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "morph/morph-v3-large", "created": 1751910858 } }, { "id": "morph-v3-fast", "name": "Morph: Morph V3 Fast", "description": "Morph's fastest apply model for code edits. ~10,500 tokens/sec with 96% accuracy for rapid code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 81920, "max_output_tokens": 38000, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "morph/morph-v3-fast", "created": 1751910002 } }, { "id": "ernie-4.5-vl-424b-a47b", "name": "Baidu: ERNIE 4.5 VL 424B A47B ", "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.", "owned_by": "openrouter", "capabilities": [ "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 123000, "max_output_tokens": 16000, "pricing": { "input": { "per_million_tokens": 0.33599999999999997, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "baidu/ernie-4.5-vl-424b-a47b", "created": 1751300903 } }, { "id": "ernie-4.5-300b-a47b", "name": "Baidu: ERNIE 4.5 300B A47B ", "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 123000, "max_output_tokens": 12000, "pricing": { "input": { "per_million_tokens": 0.224, "currency": "USD" }, "output": { "per_million_tokens": 0.88, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "baidu/ernie-4.5-300b-a47b", "created": 1751300139 } }, { "id": "anubis-70b-v1.1", "name": "TheDrummer: Anubis 70B V1.1", "description": "TheDrummer's Anubis v1.1 is an unaligned, creative Llama 3.3 70B model focused on providing character-driven roleplay & stories. It excels at gritty, visceral prose, unique character adherence, and coherent narratives, while maintaining the instruction following Llama 3.3 70B is known for.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.75, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "thedrummer/anubis-70b-v1.1", "created": 1751208347 } }, { "id": "mercury", "name": "Inception: Mercury", "description": "Mercury is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like GPT-4.1 Nano and Claude 3.5 Haiku while matching their performance. Mercury's speed enables developers to provide responsive user experiences, including with voice agents, search interfaces, and chatbots. Read more in the [blog post]\n(https://www.inceptionlabs.ai/blog/introducing-mercury) here. ", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "inception/mercury", "created": 1750973026 } }, { "id": "mistral-small-3.2-24b-instruct", "name": "Mistral: Mistral Small 3.2 24B", "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.18, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/mistral-small-3.2-24b-instruct-2506", "created": 1750443016 } }, { "id": "minimax-m1", "name": "MiniMax: MiniMax M1", "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "max_output_tokens": 40000, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 2.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "minimax/minimax-m1", "created": 1750200414 } }, { "id": "gemini-2.5-flash", "description": "Gemini 2.5 Flash is Google’s best model in terms of both performance and cost efficiency, offering a comprehensive set of capabilities. It is the first Flash model to support visible reasoning, allowing insight into the thought process behind its responses. With its strong price–performance ratio, the model is well suited for large-scale processing, low-latency, high-throughput tasks that require reasoning, as well as agent-based application scenarios.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.499, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-2.5-flash", "created": 1750172488 }, "name": "Google: Gemini 2.5 Flash", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gemini-2.5-pro", "description": "Gemini 2.5 Pro is an advanced reasoning model developed by Google, optimized for solving highly complex problems across multiple domains. It can deeply understand large-scale information from diverse sources, including text, audio, images, video, and even entire codebases. The model demonstrates strong reasoning capabilities in coding, mathematics, and STEM-related tasks, and supports long-context analysis for large datasets, codebases, and technical documentation.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "WEB_SEARCH", "REASONING" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.31, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs", "long_context", "web", "thinking", "deepsearch" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs,long_context,web,thinking,deepsearch", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-2.5-pro", "created": 1750169544 }, "name": "Google: Gemini 2.5 Pro", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "kimi-dev-72b", "name": "MoonshotAI: Kimi Dev 72B", "description": "Kimi-Dev-72B is an open-source large language model fine-tuned for software engineering and issue resolution tasks. Based on Qwen2.5-72B, it is optimized using large-scale reinforcement learning that applies code patches in real repositories and validates them via full test suite execution—rewarding only correct, robust completions. The model achieves 60.4% on SWE-bench Verified, setting a new benchmark among open-source models for software bug fixing and code reasoning.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.29, "currency": "USD" }, "output": { "per_million_tokens": 1.15, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "moonshotai/kimi-dev-72b", "created": 1750115909 } }, { "id": "o3-pro", "description": "o3-pro\nThis model only supports Requests API interface requests.The model's thinking time is relatively long, so the response will be slow.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 100000, "pricing": { "input": { "per_million_tokens": 20, "currency": "USD" }, "output": { "per_million_tokens": 80, "currency": "USD" }, "cache_read": { "per_million_tokens": 20, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/o3-pro-2025-06-10", "created": 1749598352 }, "name": "OpenAI: o3 Pro", "owned_by": "openrouter" }, { "id": "grok-3-mini", "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.501, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Grok" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "x-ai/grok-3-mini", "created": 1749583245 }, "name": "xAI: Grok 3 Mini", "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072 }, { "id": "grok-3", "description": "Grok's latest model", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.75, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "Grok" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "x-ai/grok-3", "created": 1749582908 }, "name": "xAI: Grok 3", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072 }, { "id": "gemini-2.5-pro-preview", "name": "Google: Gemini 2.5 Pro Preview 06-05", "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.\n", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION", "AUDIO_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT", "AUDIO" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.31, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-2.5-pro-preview-06-05", "created": 1749137257 } }, { "id": "deepseek-r1-0528-qwen3-8b", "name": "DeepSeek: DeepSeek R1 0528 Qwen3 8B", "description": "DeepSeek-R1-0528 is a lightly upgraded release of DeepSeek R1 that taps more compute and smarter post-training tricks, pushing its reasoning and inference to the brink of flagship models like O3 and Gemini 2.5 Pro.\nIt now tops math, programming, and logic leaderboards, showcasing a step-change in depth-of-thought.\nThe distilled variant, DeepSeek-R1-0528-Qwen3-8B, transfers this chain-of-thought into an 8 B-parameter form, beating standard Qwen3 8B by +10 pp and tying the 235 B “thinking” giant on AIME 2024.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-r1-0528-qwen3-8b", "created": 1748538543 } }, { "id": "deepseek-r1-0528", "name": "DeepSeek: R1 0528", "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "max_output_tokens": 163840, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 1.75, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-r1-0528", "created": 1748455170 } }, { "id": "claude-opus-4", "name": "Anthropic: Claude Opus 4", "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 75, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-4-opus-20250522", "created": 1747931245 } }, { "id": "claude-sonnet-4", "name": "Anthropic: Claude Sonnet 4", "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%), Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions. Sonnet 4 is optimized for practical everyday use, providing advanced reasoning capabilities while maintaining efficiency and responsiveness in diverse internal and external scenarios.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-4-sonnet-20250522", "created": 1747930371 } }, { "id": "devstral-small-2505", "name": "Mistral: Devstral Small 2505", "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.12, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/devstral-small-2505", "created": 1747837379 } }, { "id": "gemma-3n-e4b-it:free", "name": "Google: Gemma 3n 4B (free)", "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.\n\nThis model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions. [Read more in the blog post](https://developers.googleblog.com/en/introducing-gemma-3n/)", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "max_output_tokens": 2048, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "google/gemma-3n-e4b-it", "created": 1747776824 } }, { "id": "gemma-3n-e4b-it", "description": "Gemma 3n is a generative AI model optimized for use in everyday devices, such as phones, laptops, and tablets. This model includes innovations in parameter-efficient processing, including Per-Layer Embedding (PLE) parameter caching and a MatFormer model architecture that provides the flexibility to reduce compute and memory requirements. These models feature audio input handling, as well as text and visual data.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "google/gemma-3n-e4b-it", "created": 1747776824 }, "name": "Google: Gemma 3n 4B", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768 }, { "id": "codex-mini", "name": "OpenAI: Codex Mini", "description": "codex-mini-latest is a fine-tuned version of o4-mini specifically for use in Codex CLI. For direct use in the API, we recommend starting with gpt-4.1.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 100000, "pricing": { "input": { "per_million_tokens": 1.5, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.375, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/codex-mini", "created": 1747409761 } }, { "id": "deephermes-3-mistral-24b-preview", "name": "Nous: DeepHermes 3 Mistral 24B Preview", "description": "DeepHermes 3 (Mistral 24B Preview) is an instruction-tuned language model by Nous Research based on Mistral-Small-24B, designed for chat, function calling, and advanced multi-turn reasoning. It introduces a dual-mode system that toggles between intuitive chat responses and structured “deep reasoning” mode using special system prompts. Fine-tuned via distillation from R1, it supports structured output (JSON mode) and function call syntax for agent-based applications.\n\nDeepHermes 3 supports a **reasoning toggle via system prompt**, allowing users to switch between fast, intuitive responses and deliberate, multi-step reasoning. When activated with the following specific system instruction, the model enters a *\"deep thinking\"* mode—generating extended chains of thought wrapped in `` tags before delivering a final answer. \n\nSystem Prompt: You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.\n", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nousresearch/deephermes-3-mistral-24b-preview", "created": 1746830904 } }, { "id": "mistral-medium-3", "name": "Mistral: Mistral Medium 3", "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/mistral-medium-3", "created": 1746627341 } }, { "id": "gemini-2.5-pro-preview-05-06", "description": "gemini-2.5-pro latest model", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.31, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "long_context" ], "original_types": "llm", "original_features": "thinking,long_context", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-2.5-pro-preview-03-25", "created": 1746578513 }, "name": "Google: Gemini 2.5 Pro Preview 05-06", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "spotlight", "name": "Arcee AI: Spotlight", "description": "Spotlight is a 7‑billion‑parameter vision‑language model derived from Qwen 2.5‑VL and fine‑tuned by Arcee AI for tight image‑text grounding tasks. It offers a 32 k‑token context window, enabling rich multimodal conversations that combine lengthy documents with one or more images. Training emphasized fast inference on consumer GPUs while retaining strong captioning, visual‐question‑answering, and diagram‑analysis accuracy. As a result, Spotlight slots neatly into agent workflows where screenshots, charts or UI mock‑ups need to be interpreted on the fly. Early benchmarks show it matching or out‑scoring larger VLMs such as LLaVA‑1.6 13 B on popular VQA and POPE alignment tests. ", "owned_by": "openrouter", "capabilities": [ "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 65537, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 0.18, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "arcee-ai/spotlight", "created": 1746481552 } }, { "id": "maestro-reasoning", "name": "Arcee AI: Maestro Reasoning", "description": "Maestro Reasoning is Arcee's flagship analysis model: a 32 B‑parameter derivative of Qwen 2.5‑32 B tuned with DPO and chain‑of‑thought RL for step‑by‑step logic. Compared to the earlier 7 B preview, the production 32 B release widens the context window to 128 k tokens and doubles pass‑rate on MATH and GSM‑8K, while also lifting code completion accuracy. Its instruction style encourages structured \"thought → answer\" traces that can be parsed or hidden according to user preference. That transparency pairs well with audit‑focused industries like finance or healthcare where seeing the reasoning path matters. In Arcee Conductor, Maestro is automatically selected for complex, multi‑constraint queries that smaller SLMs bounce. ", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.8999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 3.3000000000000003, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "arcee-ai/maestro-reasoning", "created": 1746481269 } }, { "id": "virtuoso-large", "name": "Arcee AI: Virtuoso Large", "description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k context inherited from Qwen 2.5, letting it ingest books, codebases or financial filings wholesale. Training blended DeepSeek R1 distillation, multi‑epoch supervised fine‑tuning and a final DPO/RLHF alignment stage, yielding strong performance on BIG‑Bench‑Hard, GSM‑8K and long‑context Needle‑In‑Haystack tests. Enterprises use Virtuoso‑Large as the \"fallback\" brain in Conductor pipelines when other SLMs flag low confidence. Despite its size, aggressive KV‑cache optimizations keep first‑token latency in the low‑second range on 8× H100 nodes, making it a practical production‑grade powerhouse.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 0.75, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "arcee-ai/virtuoso-large", "created": 1746478885 } }, { "id": "coder-large", "name": "Arcee AI: Coder Large", "description": "Coder‑Large is a 32 B‑parameter offspring of Qwen 2.5‑Instruct that has been further trained on permissively‑licensed GitHub, CodeSearchNet and synthetic bug‑fix corpora. It supports a 32k context window, enabling multi‑file refactoring or long diff review in a single call, and understands 30‑plus programming languages with special attention to TypeScript, Go and Terraform. Internal benchmarks show 5–8 pt gains over CodeLlama‑34 B‑Python on HumanEval and competitive BugFix scores thanks to a reinforcement pass that rewards compilable output. The model emits structured explanations alongside code blocks by default, making it suitable for educational tooling as well as production copilot scenarios. Cost‑wise, Together AI prices it well below proprietary incumbents, so teams can scale interactive coding without runaway spend. ", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 0.7999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "arcee-ai/coder-large", "created": 1746478663 } }, { "id": "phi-4-reasoning-plus", "name": "Microsoft: Phi 4 Reasoning Plus", "description": "Phi-4-reasoning-plus is an enhanced 14B parameter model from Microsoft, fine-tuned from Phi-4 with additional reinforcement learning to boost accuracy on math, science, and code reasoning tasks. It uses the same dense decoder-only transformer architecture as Phi-4, but generates longer, more comprehensive outputs structured into a step-by-step reasoning trace and final answer.\n\nWhile it offers improved benchmark scores over Phi-4-reasoning across tasks like AIME, OmniMath, and HumanEvalPlus, its responses are typically ~50% longer, resulting in higher latency. Designed for English-only applications, it is well-suited for structured reasoning workflows where output quality takes priority over response speed.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.07, "currency": "USD" }, "output": { "per_million_tokens": 0.35, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "microsoft/phi-4-reasoning-plus-04-30", "created": 1746130961 } }, { "id": "mercury-coder", "name": "Inception: Mercury Coder", "description": "Mercury Coder is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like Claude 3.5 Haiku and GPT-4o Mini while matching their performance. Mercury Coder's speed means that developers can stay in the flow while coding, enjoying rapid chat-based iteration and responsive code completion suggestions. On Copilot Arena, Mercury Coder ranks 1st in speed and ties for 2nd in quality. Read more in the [blog post here](https://www.inceptionlabs.ai/blog/introducing-mercury).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "inception/mercury-coder-small-beta", "created": 1746033880 } }, { "id": "qwen3-4b:free", "name": "Qwen: Qwen3 4B (free)", "description": "Qwen3-4B is a 4 billion parameter dense language model from the Qwen3 series, designed to support both general-purpose and reasoning-intensive tasks. It introduces a dual-mode architecture—thinking and non-thinking—allowing dynamic switching between high-precision logical reasoning and efficient dialogue generation. This makes it well-suited for multi-turn chat, instruction following, and complex agent workflows.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-4b-04-28", "created": 1746031104 } }, { "id": "deepseek-prover-v2", "name": "DeepSeek: DeepSeek Prover V2", "description": "DeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics. Likely an upgrade from [DeepSeek-Prover-V1.5](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V1.5-RL) Not much is known about the model yet, as DeepSeek released it on Hugging Face without an announcement or description.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 2.1799999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-prover-v2", "created": 1746013094 } }, { "id": "llama-guard-4-12b", "name": "Meta: Llama Guard 4 12B", "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 0.18, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "meta-llama/llama-guard-4-12b", "created": 1745975193 } }, { "id": "qwen3-30b-a3b", "description": "Achieves effective integration of thinking and non-thinking modes, allowing mode switching during conversations. Its reasoning ability matches that of QwQ-32B with a smaller parameter size, and its general capability significantly surpasses Qwen2.5-14B, reaching state-of-the-art (SOTA) levels among industry models of the same scale.", "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-30b-a3b-04-28", "created": 1745878604 }, "name": "Qwen: Qwen3 30B A3B", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "max_output_tokens": 40960 }, { "id": "qwen3-8b", "description": "Achieves effective integration of thinking and non-thinking modes, enabling mode switching during conversations. Its reasoning ability reaches state-of-the-art (SOTA) levels among models of the same scale, and its general capability significantly surpasses Qwen2.5-7B.", "pricing": { "input": { "per_million_tokens": 0.08, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-8b-04-28", "created": 1745876632 }, "name": "Qwen: Qwen3 8B", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 20000 }, { "id": "qwen3-14b", "description": "Achieves effective integration of thinking and non-thinking modes, enabling mode switching during conversations. Its reasoning ability reaches state-of-the-art (SOTA) levels among models of the same scale, and its general capability significantly surpasses Qwen2.5-14B.", "pricing": { "input": { "per_million_tokens": 0.16, "currency": "USD" }, "output": { "per_million_tokens": 1.6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-14b-04-28", "created": 1745876478 }, "name": "Qwen: Qwen3 14B", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "max_output_tokens": 40960 }, { "id": "qwen3-32b", "description": "Achieves effective integration of thinking and non-thinking modes, allowing mode switching during conversations. Its reasoning ability significantly surpasses QwQ, and its general capability significantly exceeds Qwen2.5-32B-Instruct, reaching state-of-the-art (SOTA) levels among industry models of the same scale.", "pricing": { "input": { "per_million_tokens": 0.32, "currency": "USD" }, "output": { "per_million_tokens": 3.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-32b-04-28", "created": 1745875945 }, "name": "Qwen: Qwen3 32B", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "max_output_tokens": 40960 }, { "id": "qwen3-235b-a22b:free", "name": "Qwen: Qwen3 235B A22B (free)", "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Qwen3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-235b-a22b-04-28", "created": 1745875757 } }, { "id": "qwen3-235b-a22b", "description": "Qwen3-235B-A22B is a massive 235B parameter Mixture-of-Experts (MoE) model that operates with the efficiency of a 22B model. Its standout feature is the ability to seamlessly switch between a \"thinking\" mode for complex reasoning and a \"non-thinking\" mode for fast conversation, offering both world-class power and practical speed.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 131100, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 0.28, "currency": "USD" }, "output": { "per_million_tokens": 1.12, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen3-235b-a22b-04-28", "created": 1745875757 }, "name": "Qwen: Qwen3 235B A22B", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "deepseek-r1t-chimera:free", "name": "TNG: DeepSeek R1T Chimera (free)", "description": "DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3. It is based on a DeepSeek-MoE Transformer architecture and is optimized for general text generation tasks.\n\nThe model merges pretrained weights from both source models to balance performance across reasoning, efficiency, and instruction-following tasks. It is released under the MIT license and intended for research and commercial use.", "owned_by": "openrouter", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "tngtech/deepseek-r1t-chimera", "created": 1745760875 } }, { "id": "deepseek-r1t-chimera", "name": "TNG: DeepSeek R1T Chimera", "description": "DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3. It is based on a DeepSeek-MoE Transformer architecture and is optimized for general text generation tasks.\n\nThe model merges pretrained weights from both source models to balance performance across reasoning, efficiency, and instruction-following tasks. It is released under the MIT license and intended for research and commercial use.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "max_output_tokens": 163840, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "tngtech/deepseek-r1t-chimera", "created": 1745760875 } }, { "id": "mai-ds-r1", "name": "Microsoft: MAI DS R1", "description": "MAI-DS-R1 is a post-trained variant of DeepSeek-R1 developed by the Microsoft AI team to improve the model’s responsiveness on previously blocked topics while enhancing its safety profile. Built on top of DeepSeek-R1’s reasoning foundation, it integrates 110k examples from the Tulu-3 SFT dataset and 350k internally curated multilingual safety-alignment samples. The model retains strong reasoning, coding, and problem-solving capabilities, while unblocking a wide range of prompts previously restricted in R1.\n\nMAI-DS-R1 demonstrates improved performance on harm mitigation benchmarks and maintains competitive results across general reasoning tasks. It surpasses R1-1776 in satisfaction metrics for blocked queries and reduces leakage in harmful content categories. The model is based on a transformer MoE architecture and is suitable for general-purpose use cases, excluding high-stakes domains such as legal, medical, or autonomous systems.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "max_output_tokens": 163840, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "microsoft/mai-ds-r1", "created": 1745194100 } }, { "id": "o4-mini-high", "name": "OpenAI: o4 Mini High", "description": "OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high. \n\nOpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "WEB_SEARCH", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 100000, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 4.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.275, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/o4-mini-high-2025-04-16", "created": 1744824212 } }, { "id": "o3", "description": "OpenAI o3 is a powerful model across multiple domains, setting a new standard for coding, math, science, and visual reasoning tasks.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 200000, "max_output_tokens": 100000, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/o3-2025-04-16", "created": 1744823457 }, "name": "OpenAI: o3", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "o4-mini", "description": "o4-mini is a remarkably smart model for its speed and cost-efficiency. This allows it to support significantly higher usage limits than o3, making it a strong high-volume, high-throughput option for everyone with questions that benefit from reasoning.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 200000, "max_output_tokens": 100000, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 4.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.275, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tool", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tool,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/o4-mini-2025-04-16", "created": 1744820942 }, "name": "OpenAI: o4 Mini", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "qwen2.5-coder-7b-instruct", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen2.5-coder-7b-instruct", "created": 1744734887 }, "name": "Qwen: Qwen2.5 Coder 7B Instruct", "description": "Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing. Based on the Qwen2.5 architecture, it incorporates enhancements like RoPE, SwiGLU, RMSNorm, and GQA attention with support for up to 128K tokens using YaRN-based extrapolation. It is trained on a large corpus of source code, synthetic data, and text-code grounding, providing robust performance across programming languages and agentic coding workflows.\n\nThis model is part of the Qwen2.5-Coder family and offers strong compatibility with tools like vLLM for efficient deployment. Released under the Apache 2.0 license.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768 }, { "id": "gpt-4.1", "description": "The latest flagship multimodal model supports million-token context, with encoding capability (SWE-bench 54.6%) and instruction-following (Scale AI 38.3%) performance significantly surpassing GPT-4o, while reducing costs by 26%, making it suitable for complex tasks. Its automatic caching mechanism offers a 75% cost reduction on cache hits.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 1047576, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs,long_context", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4.1-2025-04-14", "created": 1744651385 }, "name": "OpenAI: GPT-4.1", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-4.1-mini", "description": "Lightweight, high-performance model with million-token context and near-flagship-level encoding and image understanding capabilities, while reducing costs by 83%. It is suitable for rapid development and small to medium-sized applications. The automatic caching mechanism provides a 75% cost reduction on cache hits.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 1047576, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs,long_context", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4.1-mini-2025-04-14", "created": 1744651381 }, "name": "OpenAI: GPT-4.1 Mini", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-4.1-nano", "description": "Ultra-lightweight model with million-token context, optimized for speed and low latency, costing only $0.10 per million input tokens. It is suitable for edge computing and real-time interaction. The automatic caching mechanism offers a 75% cost reduction on cache hits.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 1047576, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.025, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs,long_context", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4.1-nano-2025-04-14", "created": 1744651369 }, "name": "OpenAI: GPT-4.1 Nano", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "llemma_7b", "name": "EleutherAI: Llemma 7b", "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens. Llemma models are particularly strong at chain-of-thought mathematical reasoning and using computational tools for mathematics, such as Python and formal theorem provers.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "max_output_tokens": 4096, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "eleutherai/llemma_7b", "created": 1744643225 } }, { "id": "codellama-7b-instruct-solidity", "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity", "description": "A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "max_output_tokens": 4096, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "alfredpros/codellama-7b-instruct-solidity", "created": 1744641874 } }, { "id": "qwq-32b-arliai-rpr-v1", "name": "ArliAI: QwQ 32B RpR v1", "description": "QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model fine-tuned from Qwen/QwQ-32B using a curated creative writing and roleplay dataset originally developed for the RPMax series. It is designed to maintain coherence and reasoning across long multi-turn conversations by introducing explicit reasoning steps per dialogue turn, generated and refined using the base model itself.\n\nThe model was trained using RS-QLORA+ on 8K sequence lengths and supports up to 128K context windows (with practical performance around 32K). It is optimized for creative roleplay and dialogue generation, with an emphasis on minimizing cross-context repetition while preserving stylistic diversity.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "arliai/qwq-32b-arliai-rpr-v1", "created": 1744555982 } }, { "id": "grok-3-mini-beta", "description": "This model ID with beta has been officially taken offline. Using this model grok-3-mini-beta will automatically point to grok-3-mini.", "pricing": { "input": { "per_million_tokens": 0.33, "currency": "USD" }, "output": { "per_million_tokens": 0.5511, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "tools", "Grok" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "x-ai/grok-3-mini-beta", "created": 1744240195 }, "name": "xAI: Grok 3 Mini Beta", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072 }, { "id": "grok-3-beta", "description": "Grok's latest model\nThis model ID with beta has been officially taken offline. Using this model grok-3-beta will automatically point to grok-3.", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "tools", "Grok" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "x-ai/grok-3-beta", "created": 1744240068 }, "name": "xAI: Grok 3 Beta", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072 }, { "id": "llama-3.1-nemotron-ultra-253b-v1", "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1", "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 1.7999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", "created": 1744115059 } }, { "id": "llama-4-maverick", "description": "Llama 4 Maverick is a high-capacity Mixture-of-Experts (MoE) model from Meta, featuring 400B total parameters and 128 experts, while activating an efficient 17B parameters per inference. Engineered for peak performance, it excels at advanced multimodal tasks.\n\nMaverick natively supports text and image input, producing multilingual text and code. With a 1-million-token context window and instruction tuning, it is optimized for complex image reasoning and general-purpose assistant-like interactions.\n\nReleased under the Llama 4 Community License, Maverick is ideal for research and commercial applications demanding state-of-the-art multimodal understanding and high throughput.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 1048576, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "meta-llama/llama-4-maverick-17b-128e-instruct", "created": 1743881822 }, "name": "Meta: Llama 4 Maverick", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "llama-4-scout", "description": "Llama 4 Scout is a highly efficient Mixture-of-Experts (MoE) model from Meta, activating 17B out of 109B total parameters per inference. It natively supports multimodal input (text and image) and multilingual output (text and code) across 12 languages.\n\nDesigned for assistant-style interaction and visual reasoning, Scout features a massive 10-million-token context window. It is instruction-tuned for tasks like multilingual chat and image understanding and is released under the Llama 4 Community License for local or commercial deployment.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 131000, "max_output_tokens": 131000, "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "meta-llama/llama-4-scout-17b-16e-instruct", "created": 1743881519 }, "name": "Meta: Llama 4 Scout", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "qwen2.5-vl-32b-instruct", "name": "Qwen: Qwen2.5 VL 32B Instruct", "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 16384, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.22, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "Qwen" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen2.5-vl-32b-instruct", "created": 1742839838 } }, { "id": "deepseek-chat-v3-0324", "name": "DeepSeek: DeepSeek V3 0324", "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "max_output_tokens": 7168, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.7, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "reasoning", "structured_outputs", "tools", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-chat-v3-0324", "created": 1742824755 } }, { "id": "o1-pro", "description": "The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o1-pro model uses more compute to think harder and provide consistently better answers.", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 170, "currency": "USD" }, "output": { "per_million_tokens": 680, "currency": "USD" }, "cache_read": { "per_million_tokens": 170, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking" ], "original_types": "llm", "original_features": "thinking", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/o1-pro", "created": 1742423211 }, "name": "OpenAI: o1-pro", "owned_by": "openrouter", "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 100000 }, { "id": "mistral-small-3.1-24b-instruct:free", "name": "Mistral: Mistral Small 3.1 24B (free)", "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/mistral-small-3.1-24b-instruct-2503", "created": 1742238937 } }, { "id": "mistral-small-3.1-24b-instruct", "name": "Mistral: Mistral Small 3.1 24B", "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/mistral-small-3.1-24b-instruct-2503", "created": 1742238937 } }, { "id": "olmo-2-0325-32b-instruct", "name": "AllenAI: Olmo 2 32B Instruct", "description": "OLMo-2 32B Instruct is a supervised instruction-finetuned variant of the OLMo-2 32B March 2025 base model. It excels in complex reasoning and instruction-following tasks across diverse benchmarks such as GSM8K, MATH, IFEval, and general NLP evaluation. Developed by AI2, OLMo-2 32B is part of an open, research-oriented initiative, trained primarily on English-language datasets to advance the understanding and development of open-source language models.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "allenai/olmo-2-0325-32b-instruct", "created": 1741988556 } }, { "id": "gemma-3-4b-it:free", "name": "Google: Gemma 3 4B (free)", "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 8192, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemma-3-4b-it", "created": 1741905510 } }, { "id": "gemma-3-4b-it", "description": "Gemma 3 models are multimodal, handling text and image input and generating text output, with open weights for both pre-trained variants and instruction-tuned variants. Gemma 3 has a large, 128K context window, multilingual support in over 140 languages, and is available in more sizes than previous versions. Gemma 3 models are well-suited for a variety of text generation and image understanding tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as laptops, desktops or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemma-3-4b-it", "created": 1741905510 }, "name": "Google: Gemma 3 4B", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 96000 }, { "id": "gemma-3-12b-it:free", "name": "Google: Gemma 3 12B (free)", "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)", "owned_by": "openrouter", "capabilities": [ "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 8192, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemma-3-12b-it", "created": 1741902625 } }, { "id": "gemma-3-12b-it", "description": "Gemma 3 models are multimodal, handling text and image input and generating text output, with open weights for both pre-trained variants and instruction-tuned variants. Gemma 3 has a large, 128K context window, multilingual support in over 140 languages, and is available in more sizes than previous versions. Gemma 3 models are well-suited for a variety of text generation and image understanding tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as laptops, desktops or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "structured_outputs", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemma-3-12b-it", "created": 1741902625 }, "name": "Google: Gemma 3 12B", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072 }, { "id": "command-a", "name": "Cohere: Command A", "description": "Command A is an open-weights 111B parameter model with a 256k context window focused on delivering great performance across agentic, multilingual, and coding use cases.\nCompared to other leading proprietary and open-weights models Command A delivers maximum performance with minimum hardware costs, excelling on business-critical agentic and multilingual tasks.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "cohere/command-a-03-2025", "created": 1741894342 } }, { "id": "gpt-4o-mini-search-preview", "description": "Using the Chat Completions API, you can directly access the fine-tuned models and tool used by Search in ChatGPT.\n\nWhen using Chat Completions, the model always retrieves information from the web before responding to your query. To use web_search_preview as a tool that models like gpt-4o and gpt-4o-mini invoke only when necessary, switch to using the Responses API.\n\nCurrently, you need to use one of these models to use web search in Chat Completions:\n\ngpt-4o-search-preview\ngpt-4o-mini-search-preview\nWeb search parameter example\nimport OpenAI from \"openai\";\nconst client = new OpenAI();\n\nconst completion = await client.chat.completions.create({\n model: \"gpt-4o-search-preview\",\n web_search_options: {},\n messages: [{\n \"role\": \"user\",\n \"content\": \"What was a positive news story from today?\"\n }],\n});\n\nconsole.log(completion.choices[0].message.content);\nOutput and citations\nThe API response item in the choices array will include:\n\nmessage.content with the text result from the model, inclusive of any inline citations\nannotations with a list of cited URLs\nBy default, the model's response will include inline citations for URLs found in the web search results. In addition to this, the url_citation annotation object will contain the URL and title of the cited source, as well as the start and end index characters in the model's response where those sources were used.", "capabilities": [ "WEB_SEARCH", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "web", "function_calling", "structured_outputs" ], "original_types": "llm,search", "original_features": "web,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-4o-mini-search-preview-2025-03-11", "created": 1741818122 }, "name": "OpenAI: GPT-4o-mini Search Preview", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-4o-search-preview", "description": "Using the Chat Completions API, you can directly access the fine-tuned models and tool used by Search in ChatGPT.\n\nWhen using Chat Completions, the model always retrieves information from the web before responding to your query. To use web_search_preview as a tool that models like gpt-4o and gpt-4o-mini invoke only when necessary, switch to using the Responses API.\n\nCurrently, you need to use one of these models to use web search in Chat Completions:\n\ngpt-4o-search-preview\ngpt-4o-mini-search-preview\nWeb search parameter example\nimport OpenAI from \"openai\";\nconst client = new OpenAI();\n\nconst completion = await client.chat.completions.create({\n model: \"gpt-4o-search-preview\",\n web_search_options: {},\n messages: [{\n \"role\": \"user\",\n \"content\": \"What was a positive news story from today?\"\n }],\n});\n\nconsole.log(completion.choices[0].message.content);\nOutput and citations\nThe API response item in the choices array will include:\n\nmessage.content with the text result from the model, inclusive of any inline citations\nannotations with a list of cited URLs\nBy default, the model's response will include inline citations for URLs found in the web search results. In addition to this, the url_citation annotation object will contain the URL and title of the cited source, as well as the start and end index characters in the model's response where those sources were used.", "capabilities": [ "WEB_SEARCH", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "web", "function_calling", "structured_outputs" ], "original_types": "llm,search", "original_features": "web,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-4o-search-preview-2025-03-11", "created": 1741817949 }, "name": "OpenAI: GPT-4o Search Preview", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gemma-3-27b-it:free", "name": "Google: Gemma 3 27B (free)", "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemma-3-27b-it", "created": 1741756359 } }, { "id": "gemma-3-27b-it", "description": "Gemma 3 models are multimodal, handling text and image input and generating text output, with open weights for both pre-trained variants and instruction-tuned variants. Gemma 3 has a large, 128K context window, multilingual support in over 140 languages, and is available in more sizes than previous versions. Gemma 3 models are well-suited for a variety of text generation and image understanding tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as laptops, desktops or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "structured_outputs", "tools", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemma-3-27b-it", "created": 1741756359 }, "name": "Google: Gemma 3 27B", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 96000, "max_output_tokens": 96000 }, { "id": "skyfall-36b-v2", "name": "TheDrummer: Skyfall 36B V2", "description": "Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.55, "currency": "USD" }, "output": { "per_million_tokens": 0.7999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "thedrummer/skyfall-36b-v2", "created": 1741636566 } }, { "id": "phi-4-multimodal-instruct", "name": "Microsoft: Phi 4 Multimodal Instruct", "description": "Phi-4 Multimodal Instruct is a versatile 5.6B parameter foundation model that combines advanced reasoning and instruction-following capabilities across both text and visual inputs, providing accurate text outputs. The unified architecture enables efficient, low-latency inference, suitable for edge and mobile deployments. Phi-4 Multimodal Instruct supports text inputs in multiple languages including Arabic, Chinese, English, French, German, Japanese, Spanish, and more, with visual input optimized primarily for English. It delivers impressive performance on multimodal tasks involving mathematical, scientific, and document reasoning, providing developers and enterprises a powerful yet compact model for sophisticated interactive applications. For more information, see the [Phi-4 Multimodal blog post](https://azure.microsoft.com/en-us/blog/empowering-innovation-the-next-generation-of-the-phi-family/).\n", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "microsoft/phi-4-multimodal-instruct", "created": 1741396284 } }, { "id": "sonar-reasoning-pro", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 12, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "include_reasoning", "reasoning", "web_search_options" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "perplexity/sonar-reasoning-pro", "created": 1741313308 }, "name": "Perplexity: Sonar Reasoning Pro", "description": "Note: Sonar Pro pricing includes Perplexity search pricing. See [details here](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-reasoning-pro-and-sonar-pro)\n\nSonar Reasoning Pro is a premier reasoning model powered by DeepSeek R1 with Chain of Thought (CoT). Designed for advanced use cases, it supports in-depth, multi-step queries with a larger context window and can surface more citations per search, enabling more comprehensive and extensible responses.", "owned_by": "openrouter", "capabilities": [ "REASONING", "WEB_SEARCH", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 128000 }, { "id": "sonar-pro", "name": "Perplexity: Sonar Pro", "description": "Note: Sonar Pro pricing includes Perplexity search pricing. See [details here](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-reasoning-pro-and-sonar-pro)\n\nFor enterprises seeking more advanced capabilities, the Sonar Pro API can handle in-depth, multi-step queries with added extensibility, like double the number of citations per search as Sonar on average. Plus, with a larger context window, it can handle longer and more nuanced searches and follow-up questions. ", "owned_by": "openrouter", "capabilities": [ "WEB_SEARCH", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 8000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "web_search_options" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "perplexity/sonar-pro", "created": 1741312423 } }, { "id": "sonar-deep-research", "name": "Perplexity: Sonar Deep Research", "description": "Sonar Deep Research is a research-focused model designed for multi-step retrieval, synthesis, and reasoning across complex topics. It autonomously searches, reads, and evaluates sources, refining its approach as it gathers information. This enables comprehensive report generation across domains like finance, technology, health, and current events.\n\nNotes on Pricing ([Source](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-deep-research)) \n- Input tokens comprise of Prompt tokens (user prompt) + Citation tokens (these are processed tokens from running searches)\n- Deep Research runs multiple searches to conduct exhaustive research. Searches are priced at $5/1000 searches. A request that does 30 searches will cost $0.15 in this step.\n- Reasoning is a distinct step in Deep Research since it does extensive automated reasoning through all the material it gathers during its research phase. Reasoning tokens here are a bit different than the CoTs in the answer - these are tokens that we use to reason through the research material prior to generating the outputs via the CoTs. Reasoning tokens are priced at $3/1M tokens", "owned_by": "openrouter", "capabilities": [ "REASONING", "WEB_SEARCH" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "web_search_options" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "perplexity/sonar-deep-research", "created": 1741311246 } }, { "id": "qwq-32b", "description": "The model provider is the Sophnet platform. QwQ is an inference model from the Qianwen series, featuring outstanding thinking and reasoning capabilities.\nCompared to traditional instruction-finetuned models, QwQ can achieve significantly enhanced performance on downstream tasks, especially on difficult problems.\nQwQ-32B is a medium-sized inference model capable of delivering competitive performance compared to state-of-the-art inference models such as DeepSeek-R1 and o1-mini.\nIt supports long context lengths of up to 128K tokens and can generate text up to 128K tokens.", "pricing": { "input": { "per_million_tokens": 0.28, "currency": "USD" }, "output": { "per_million_tokens": 0.84, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "tools", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwq-32b", "created": 1741208814 }, "name": "Qwen: QwQ 32B", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768 }, { "id": "gemini-2.0-flash-lite-001", "description": "Google Gemini's enterprise version VertexAI", "pricing": { "input": { "per_million_tokens": 0.076, "currency": "USD" }, "output": { "per_million_tokens": 0.304, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.076, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "structured_outputs", "tools", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-2.0-flash-lite-001", "created": 1740506212 }, "name": "Google: Gemini 2.0 Flash Lite", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION", "AUDIO_RECOGNITION", "VIDEO_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "max_output_tokens": 8192 }, { "id": "claude-3.7-sonnet:thinking", "name": "Anthropic: Claude 3.7 Sonnet (thinking)", "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-3-7-sonnet-20250219", "created": 1740422110 } }, { "id": "claude-3.7-sonnet", "name": "Anthropic: Claude 3.7 Sonnet", "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "include_reasoning", "reasoning", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-3-7-sonnet-20250219", "created": 1740422110 } }, { "id": "mistral-saba", "name": "Mistral: Saba", "description": "Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance. Trained on curated regional datasets, it supports multiple Indian-origin languages—including Tamil and Malayalam—alongside Arabic. This makes it a versatile option for a range of regional and multilingual applications. Read more at the blog post [here](https://mistral.ai/en/news/mistral-saba)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-saba-2502", "created": 1739803239 } }, { "id": "llama-guard-3-8b", "name": "Llama Guard 3 8B", "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.06, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-guard-3-8b", "created": 1739401318 } }, { "id": "o3-mini-high", "name": "OpenAI: o3 Mini High", "description": "OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high. \n\no3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 100000, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 4.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.55, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/o3-mini-high-2025-01-31", "created": 1739372611 } }, { "id": "gemini-2.0-flash-001", "description": "Google Gemini's enterprise version VertexAI", "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.25, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "structured_outputs", "tools", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-2.0-flash-001", "created": 1738769413 }, "name": "Google: Gemini 2.0 Flash", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION", "AUDIO_RECOGNITION", "VIDEO_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "max_output_tokens": 8192 }, { "id": "qwen-vl-plus", "name": "Qwen: Qwen VL Plus", "description": "Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for image input. It delivers significant performance across a broad range of visual tasks.\n", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 7500, "max_output_tokens": 1500, "pricing": { "input": { "per_million_tokens": 0.21, "currency": "USD" }, "output": { "per_million_tokens": 0.63, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "Qwen" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen-vl-plus", "created": 1738731255 } }, { "id": "aion-1.0", "name": "AionLabs: Aion-1.0", "description": "Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding. It is built on DeepSeek-R1, augmented with additional models and techniques such as Tree of Thoughts (ToT) and Mixture of Experts (MoE). It is Aion Lab's most powerful reasoning model.", "owned_by": "openrouter", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "aion-labs/aion-1.0", "created": 1738697557 } }, { "id": "aion-1.0-mini", "name": "AionLabs: Aion-1.0-Mini", "description": "Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic. It is a modified variant of a FuseAI model that outperforms R1-Distill-Qwen-32B and R1-Distill-Llama-70B, with benchmark results available on its [Hugging Face page](https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview), independently replicated for verification.", "owned_by": "openrouter", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.7, "currency": "USD" }, "output": { "per_million_tokens": 1.4, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "aion-labs/aion-1.0-mini", "created": 1738697107 } }, { "id": "aion-rp-llama-3.1-8b", "name": "AionLabs: Aion-RP 1.0 (8B)", "description": "Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other’s responses. It is a fine-tuned base model rather than an instruct model, designed to produce more natural and varied writing.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "aion-labs/aion-rp-llama-3.1-8b", "created": 1738696718 } }, { "id": "qwen-vl-max", "name": "Qwen: Qwen VL Max", "description": "Qwen VL Max is a visual understanding model with 7500 tokens context length. It excels in delivering optimal performance for a broader spectrum of complex tasks.\n", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 3.1999999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "tools", "Qwen" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen-vl-max-2025-01-25", "created": 1738434304 } }, { "id": "qwen-turbo", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.36, "currency": "USD" }, "output": { "per_million_tokens": 1.08, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "long_context" ], "original_types": "llm", "original_features": "long_context", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen-turbo-2024-11-01", "created": 1738410974 }, "name": "Qwen: Qwen-Turbo", "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "max_output_tokens": 8192 }, { "id": "qwen2.5-vl-72b-instruct", "description": "The model provider is the Sophon platform. Qwen2.5-VL-72B-Instruct is the latest vision-language model released by the Qwen team. This model excels not only at recognizing common objects such as flowers, birds, fish, and insects, but also at efficiently analyzing text, charts, icons, graphics, and layouts within images. As a visual agent, it is capable of reasoning and dynamically guiding tool usage, supporting both computer and mobile operations. Moreover, it can understand videos longer than one hour and accurately locate relevant video segments.", "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 0.62, "currency": "USD" }, "output": { "per_million_tokens": 0.62, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "structured_outputs", "Qwen" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen2.5-vl-72b-instruct", "created": 1738410311 }, "name": "Qwen: Qwen2.5 VL 72B Instruct", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768 }, { "id": "qwen-plus", "pricing": { "input": { "per_million_tokens": 0.7, "currency": "USD" }, "output": { "per_million_tokens": 2.1, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.16, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "tools", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen-plus-2025-01-25", "created": 1738409840 }, "name": "Qwen: Qwen-Plus", "description": "Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 8192 }, { "id": "qwen-max", "pricing": { "input": { "per_million_tokens": 0.38, "currency": "USD" }, "output": { "per_million_tokens": 1.52, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.64, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "tools", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen-max-2025-01-25", "created": 1738402289 }, "name": "Qwen: Qwen-Max ", "description": "Qwen-Max, based on Qwen2.5, provides the best inference performance among [Qwen models](/qwen), especially for complex multi-step tasks. It's a large-scale MoE model that has been pretrained on over 20 trillion tokens and further post-trained with curated Supervised Fine-Tuning (SFT) and Reinforcement Learning from Human Feedback (RLHF) methodologies. The parameter count is unknown.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 8192 }, { "id": "o3-mini", "description": "OpenAI's latest fast inference model excels at STEAM tasks and offers exceptional cost-effectiveness. Official support for cache hits reduces input prices by half.", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 200000, "max_output_tokens": 100000, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 4.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.55, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking" ], "original_types": "llm", "original_features": "thinking", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/o3-mini-2025-01-31", "created": 1738351721 }, "name": "OpenAI: o3 Mini", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "mistral-small-24b-instruct-2501", "name": "Mistral: Mistral Small 3", "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-small-24b-instruct-2501", "created": 1738255409 } }, { "id": "deepseek-r1-distill-qwen-32b", "description": "The model provider is the Sophnet platform. Deepseek-R1-Distill-Qwen-32B is a knowledge-distilled large language model based on Qwen 2.5 32B and trained using outputs from DeepSeek R1.\nDeepSeek-R1 addresses issues such as infinite repetition, poor readability, and language mixing by introducing cold-start data before reinforcement learning.\nDeepSeek-R1’s performance in mathematics, programming, and reasoning tasks is comparable to OpenAI-o1.\nTo support the research community, we have open-sourced DeepSeek-R1-Zero, DeepSeek-R1, and six dense models based on Llama and Qwen.\nDeepSeek-R1-Distill-Qwen-32B outperforms OpenAI-o1-mini on multiple benchmark tests, setting new state-of-the-art results for dense models.", "pricing": { "input": { "per_million_tokens": 0.28, "currency": "USD" }, "output": { "per_million_tokens": 0.84, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-r1-distill-qwen-32b", "created": 1738194830 }, "name": "DeepSeek: R1 Distill Qwen 32B", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 64000, "max_output_tokens": 32000 }, { "id": "deepseek-r1-distill-qwen-14b", "name": "DeepSeek: R1 Distill Qwen 14B", "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 69.7\n- MATH-500 pass@1: 93.9\n- CodeForces Rating: 1481\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 0.12, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "include_reasoning", "reasoning", "structured_outputs", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-r1-distill-qwen-14b", "created": 1738193940 } }, { "id": "sonar-reasoning", "description": "Perplexity inference model", "pricing": { "input": { "per_million_tokens": 1.6, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "include_reasoning", "reasoning", "web_search_options" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "perplexity/sonar-reasoning", "created": 1738131107 }, "name": "Perplexity: Sonar Reasoning", "owned_by": "openrouter", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 127000 }, { "id": "sonar", "description": "Latest Perplexity Model", "pricing": { "input": { "per_million_tokens": 1.6, "currency": "USD" }, "output": { "per_million_tokens": 1.6, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "web_search_options" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "perplexity/sonar", "created": 1738013808 }, "name": "Perplexity: Sonar", "owned_by": "openrouter", "capabilities": [ "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 127072 }, { "id": "deepseek-r1-distill-llama-70b", "description": "Provided by Groq, the DeepSeek-R1-Distill model is fine-tuned based on an open-source model, using samples generated by DeepSeek-R1. We have made slight modifications to their configurations and tokenizers. Please use our settings to run these models.", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.8, "currency": "USD" }, "output": { "per_million_tokens": 1.6, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking" ], "original_types": "llm", "original_features": "thinking", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-r1-distill-llama-70b", "created": 1737663169 }, "name": "DeepSeek: R1 Distill Llama 70B", "owned_by": "openrouter", "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 131072 }, { "id": "deepseek-r1", "description": "DeepSeek R1 is a new open-source model with performance on par with OpenAI's o1 and features fully open reasoning tokens. It is a 671B-parameter Mixture-of-Experts (MoE) model that activates 37B parameters during inference.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 1638000, "max_output_tokens": 1638000, "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-r1", "created": 1737381095 }, "name": "DeepSeek: R1", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "minimax-01", "name": "MiniMax: MiniMax-01", "description": "MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding. It has 456 billion parameters, with 45.9 billion parameters activated per inference, and can handle a context of up to 4 million tokens.\n\nThe text model adopts a hybrid architecture that combines Lightning Attention, Softmax Attention, and Mixture-of-Experts (MoE). The image model adopts the “ViT-MLP-LLM” framework and is trained on top of the text model.\n\nTo read more about the release, see: https://www.minimaxi.com/en/news/minimax-01-series-2", "owned_by": "openrouter", "capabilities": [ "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 1000192, "max_output_tokens": 1000192, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 1.1, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "minimax/minimax-01", "created": 1736915462 } }, { "id": "phi-4", "name": "Microsoft: Phi 4", "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16384, "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.14, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "microsoft/phi-4", "created": 1736489872 } }, { "id": "l3.1-70b-hanami-x1", "name": "Sao10K: Llama 3.1 70B Hanami x1", "description": "This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 3, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "sao10k/l3.1-70b-hanami-x1", "created": 1736302854 } }, { "id": "deepseek-chat", "name": "DeepSeek: DeepSeek V3", "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "max_output_tokens": 163840, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "DeepSeek" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "deepseek/deepseek-chat-v3", "created": 1735241320 } }, { "id": "l3.3-euryale-70b", "name": "Sao10K: Llama 3.3 Euryale 70B", "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.65, "currency": "USD" }, "output": { "per_million_tokens": 0.75, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "sao10k/l3.3-euryale-70b-v2.3", "created": 1734535928 } }, { "id": "o1", "description": "OpenAI's most powerful O-series model supports official cache hits that halve the input cost.", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" }, "cache_read": { "per_million_tokens": 7.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking" ], "original_types": "llm", "original_features": "thinking", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/o1-2024-12-17", "created": 1734459999 }, "name": "OpenAI: o1", "owned_by": "openrouter", "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 100000 }, { "id": "command-r7b-12-2024", "name": "Cohere: Command R7B (12-2024)", "description": "Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps.\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 4000, "pricing": { "input": { "per_million_tokens": 0.0375, "currency": "USD" }, "output": { "per_million_tokens": 0.15, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Cohere" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "cohere/command-r7b-12-2024", "created": 1734158152 } }, { "id": "gemini-2.0-flash-exp:free", "name": "Google: Gemini 2.0 Flash Experimental (free)", "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "max_output_tokens": 8192, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "tools", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemini-2.0-flash-exp", "created": 1733937523 } }, { "id": "llama-3.3-70b-instruct:free", "name": "Meta: Llama 3.3 70B Instruct (free)", "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3.3-70b-instruct", "created": 1733506137 } }, { "id": "llama-3.3-70b-instruct", "name": "Meta: Llama 3.3 70B Instruct", "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 120000, "pricing": { "input": { "per_million_tokens": 0.108, "currency": "USD" }, "output": { "per_million_tokens": 0.32, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3.3-70b-instruct", "created": 1733506137 } }, { "id": "nova-lite-v1", "name": "Amazon: Nova Lite 1.0", "description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite can handle real-time customer interactions, document analysis, and visual question-answering tasks with high accuracy.\n\nWith an input context of 300K tokens, it can analyze multiple images or up to 30 minutes of video in a single input.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 300000, "max_output_tokens": 5120, "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.24, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "tools", "Nova" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "amazon/nova-lite-v1", "created": 1733437363 } }, { "id": "nova-micro-v1", "name": "Amazon: Nova Micro 1.0", "description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length of 128K tokens and optimized for speed and cost, Amazon Nova Micro excels at tasks such as text summarization, translation, content classification, interactive chat, and brainstorming. It has simple mathematical reasoning and coding abilities.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 5120, "pricing": { "input": { "per_million_tokens": 0.035, "currency": "USD" }, "output": { "per_million_tokens": 0.14, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Nova" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "amazon/nova-micro-v1", "created": 1733437237 } }, { "id": "nova-pro-v1", "name": "Amazon: Nova Pro 1.0", "description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December 2024, it achieves state-of-the-art performance on key benchmarks including visual question answering (TextVQA) and video understanding (VATEX).\n\nAmazon Nova Pro demonstrates strong capabilities in processing both visual and textual information and at analyzing financial documents.\n\n**NOTE**: Video input is not supported at this time.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 300000, "max_output_tokens": 5120, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 3.1999999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "tools", "Nova" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "amazon/nova-pro-v1", "created": 1733436303 } }, { "id": "gpt-4o-2024-11-20", "description": "The latest version of the GPT-4o model; it is recommended to use this version, as it is currently smarter than the regular 4o.", "input_modalities": [ "TEXT", "VISION" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4o-2024-11-20", "created": 1732127594 }, "name": "OpenAI: GPT-4o (2024-11-20)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "output_modalities": [ "TEXT" ] }, { "id": "mistral-large-2411", "name": "Mistral Large 2411", "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-large-2411", "created": 1731978685 } }, { "id": "mistral-large-2407", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 9, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-large-2407", "created": 1731978415 }, "name": "Mistral Large 2407", "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072 }, { "id": "pixtral-large-2411", "name": "Mistral: Pixtral Large 2411", "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images.\n\nThe model is available under the Mistral Research License (MRL) for research and educational use, and the Mistral Commercial License for experimentation, testing, and production for commercial purposes.\n\n", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/pixtral-large-2411", "created": 1731977388 } }, { "id": "qwen-2.5-coder-32b-instruct", "name": "Qwen2.5 Coder 32B Instruct", "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\n\n- Significantly improvements in **code generation**, **code reasoning** and **code fixing**. \n- A more comprehensive foundation for real-world applications such as **Code Agents**. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.\n\nTo read more about its evaluation results, check out [Qwen 2.5 Coder's blog](https://qwenlm.github.io/blog/qwen2.5-coder-family/).", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen-2.5-coder-32b-instruct", "created": 1731368400 } }, { "id": "sorcererlm-8x22b", "name": "SorcererLM 8x22B", "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16000, "pricing": { "input": { "per_million_tokens": 4.5, "currency": "USD" }, "output": { "per_million_tokens": 4.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "raifle/sorcererlm-8x22b", "created": 1731105083 } }, { "id": "unslopnemo-12b", "name": "TheDrummer: UnslopNemo 12B", "description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "thedrummer/unslopnemo-12b", "created": 1731103448 } }, { "id": "claude-3.5-haiku-20241022", "name": "Anthropic: Claude 3.5 Haiku (2024-10-22)", "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.\n\nIt does not support image inputs.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.08, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-3-5-haiku-20241022", "created": 1730678400 } }, { "id": "claude-3.5-haiku", "name": "Anthropic: Claude 3.5 Haiku", "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.\n\nThis makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.\n\nThis model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.08, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-3-5-haiku", "created": 1730678400 } }, { "id": "magnum-v4-72b", "name": "Magnum v4 72B", "description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).\n\nThe model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16384, "max_output_tokens": 2048, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "anthracite-org/magnum-v4-72b", "created": 1729555200 } }, { "id": "claude-3.5-sonnet", "name": "Anthropic: Claude 3.5 Sonnet", "description": "New Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 6, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-3.5-sonnet", "created": 1729555200 } }, { "id": "ministral-8b", "name": "Mistral: Ministral 8B", "description": "Ministral 8B is an 8B parameter model featuring a unique interleaved sliding-window attention pattern for faster, memory-efficient inference. Designed for edge use cases, it supports up to 128k context length and excels in knowledge and reasoning tasks. It outperforms peers in the sub-10B category, making it perfect for low-latency, privacy-first applications.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/ministral-8b", "created": 1729123200 } }, { "id": "ministral-3b", "name": "Mistral: Ministral 3B", "description": "Ministral 3B is a 3B parameter model optimized for on-device and edge computing. It excels in knowledge, commonsense reasoning, and function-calling, outperforming larger models like Mistral 7B on most benchmarks. Supporting up to 128k context length, it’s ideal for orchestrating agentic workflows and specialist tasks with efficient inference.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.04, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/ministral-3b", "created": 1729123200 } }, { "id": "qwen-2.5-7b-instruct", "name": "Qwen: Qwen2.5 7B Instruct", "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen-2.5-7b-instruct", "created": 1729036800 } }, { "id": "llama-3.1-nemotron-70b-instruct", "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 1.2, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nvidia/llama-3.1-nemotron-70b-instruct", "created": 1728950400 } }, { "id": "inflection-3-pi", "name": "Inflection: Inflection 3 Pi", "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay.\n\nPi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8000, "max_output_tokens": 1024, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "inflection/inflection-3-pi", "created": 1728604800 } }, { "id": "inflection-3-productivity", "name": "Inflection: Inflection 3 Productivity", "description": "Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news.\n\nFor emotional intelligence similar to Pi, see [Inflect 3 Pi](/inflection/inflection-3-pi)\n\nSee [Inflection's announcement](https://inflection.ai/blog/enterprise) for more details.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8000, "max_output_tokens": 1024, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "inflection/inflection-3-productivity", "created": 1728604800 } }, { "id": "rocinante-12b", "name": "TheDrummer: Rocinante 12B", "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.16999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.43, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "thedrummer/rocinante-12b", "created": 1727654400 } }, { "id": "llama-3.2-3b-instruct:free", "name": "Meta: Llama 3.2 3B Instruct (free)", "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3.2-3b-instruct", "created": 1727222400 } }, { "id": "llama-3.2-3b-instruct", "name": "Meta: Llama 3.2 3B Instruct", "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3.2-3b-instruct", "created": 1727222400 } }, { "id": "llama-3.2-1b-instruct", "name": "Meta: Llama 3.2 1B Instruct", "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\n\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 60000, "pricing": { "input": { "per_million_tokens": 0.027, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3.2-1b-instruct", "created": 1727222400 } }, { "id": "llama-3.2-90b-vision-instruct", "name": "Meta: Llama 3.2 90B Vision Instruct", "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks. It offers unparalleled accuracy in image captioning, visual question answering, and advanced image-text comprehension. Pre-trained on vast multimodal datasets and fine-tuned with human feedback, the Llama 90B Vision is engineered to handle the most demanding image-based AI tasks.\n\nThis model is perfect for industries requiring cutting-edge multimodal AI capabilities, particularly those dealing with complex, real-time visual and textual analysis.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.35, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "Llama3" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "meta-llama/llama-3.2-90b-vision-instruct", "created": 1727222400 } }, { "id": "llama-3.2-11b-vision-instruct", "name": "Meta: Llama 3.2 11B Vision Instruct", "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.049, "currency": "USD" }, "output": { "per_million_tokens": 0.049, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "Llama3" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "meta-llama/llama-3.2-11b-vision-instruct", "created": 1727222400 } }, { "id": "qwen-2.5-72b-instruct", "name": "Qwen2.5 72B Instruct", "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 0.07, "currency": "USD" }, "output": { "per_million_tokens": 0.26, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Qwen" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "qwen/qwen-2.5-72b-instruct", "created": 1726704000 } }, { "id": "llama-3.1-lumimaid-8b", "name": "NeverSleep: Lumimaid v0.2 8B", "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/models/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.09, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "neversleep/llama-3.1-lumimaid-8b", "created": 1726358400 } }, { "id": "pixtral-12b", "name": "Mistral: Pixtral 12B", "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "mistralai/pixtral-12b", "created": 1725926400 } }, { "id": "command-r-08-2024", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "cohere/command-r-08-2024", "created": 1724976000 }, "name": "Cohere: Command R (08-2024)", "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 4000 }, { "id": "command-r-plus-08-2024", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 2.8, "currency": "USD" }, "output": { "per_million_tokens": 11.2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm", "category": "language-model", "original_architecture": "text->text", "canonical_slug": "cohere/command-r-plus-08-2024", "created": 1724976000 }, "name": "Cohere: Command R+ (08-2024)", "description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 4000 }, { "id": "l3.1-euryale-70b", "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.65, "currency": "USD" }, "output": { "per_million_tokens": 0.75, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "sao10k/l3.1-euryale-70b", "created": 1724803200 } }, { "id": "qwen-2.5-vl-7b-instruct", "name": "Qwen: Qwen2.5-VL 7B Instruct", "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "owned_by": "openrouter", "capabilities": [ "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "Qwen" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "qwen/qwen-2-vl-7b-instruct", "created": 1724803200 } }, { "id": "phi-3.5-mini-128k-instruct", "name": "Microsoft: Phi-3.5 Mini 128K Instruct", "description": "Phi-3.5 models are lightweight, state-of-the-art open models. These models were trained with Phi-3 datasets that include both synthetic data and the filtered, publicly available websites data, with a focus on high quality and reasoning-dense properties. Phi-3.5 Mini uses 3.8B parameters, and is a dense decoder-only transformer model using the same tokenizer as [Phi-3 Mini](/models/microsoft/phi-3-mini-128k-instruct).\n\nThe models underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks that test common sense, language understanding, math, code, long context and logical reasoning, Phi-3.5 models showcased robust and state-of-the-art performance among models with less than 13 billion parameters.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "microsoft/phi-3.5-mini-128k-instruct", "created": 1724198400 } }, { "id": "hermes-3-llama-3.1-70b", "name": "Nous: Hermes 3 70B Instruct", "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nousresearch/hermes-3-llama-3.1-70b", "created": 1723939200 } }, { "id": "hermes-3-llama-3.1-405b:free", "name": "Nous: Hermes 3 405B Instruct (free)", "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nousresearch/hermes-3-llama-3.1-405b", "created": 1723766400 } }, { "id": "hermes-3-llama-3.1-405b", "name": "Nous: Hermes 3 405B Instruct", "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nousresearch/hermes-3-llama-3.1-405b", "created": 1723766400 } }, { "id": "chatgpt-4o-latest", "description": "This model will point to the latest GPT-4o model used by ChatGPT.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/chatgpt-4o-latest", "created": 1723593600 }, "name": "OpenAI: ChatGPT-4o", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 16384 }, { "id": "l3-lunaris-8b", "name": "Sao10K: Llama 3 8B Lunaris", "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.049999999999999996, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "sao10k/l3-lunaris-8b", "created": 1723507200 } }, { "id": "gpt-4o-2024-08-06", "description": "Supports caching, with automatic halving of charges upon a cache hit.", "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "structured_outputs", "tools", "web_search_options", "GPT" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4o-2024-08-06", "created": 1722902400 }, "name": "OpenAI: GPT-4o (2024-08-06)", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 16384 }, { "id": "llama-3.1-405b", "name": "Meta: Llama 3.1 405B (base)", "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 32768, "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3.1-405b", "created": 1722556800 } }, { "id": "llama-3.1-8b-instruct", "name": "Meta: Llama 3.1 8B Instruct", "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.03, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3.1-8b-instruct", "created": 1721692800 } }, { "id": "llama-3.1-405b-instruct", "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3.1-405b-instruct", "created": 1721692800 }, "name": "Meta: Llama 3.1 405B Instruct", "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 130815 }, { "id": "llama-3.1-70b-instruct", "name": "Meta: Llama 3.1 70B Instruct", "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3.1-70b-instruct", "created": 1721692800 } }, { "id": "mistral-nemo", "name": "Mistral: Mistral Nemo", "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.04, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-nemo", "created": 1721347200 } }, { "id": "gpt-4o-mini-2024-07-18", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4o-mini-2024-07-18", "created": 1721260800 }, "name": "OpenAI: GPT-4o-mini (2024-07-18)", "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 16384 }, { "id": "gpt-4o-mini", "description": "The lightweight version of GPT-4o, which is affordable and fast, suitable for handling simple tasks; our site supports the official automatic caching for this model, and charges for cache hits will be automatically halved.", "input_modalities": [ "TEXT", "VISION" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4o-mini", "created": 1721260800 }, "name": "OpenAI: GPT-4o-mini", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "output_modalities": [ "TEXT" ] }, { "id": "gemma-2-27b-it", "name": "Google: Gemma 2 27B", "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).\n\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.65, "currency": "USD" }, "output": { "per_million_tokens": 0.65, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Gemini" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "google/gemma-2-27b-it", "created": 1720828800 } }, { "id": "gemma-2-9b-it", "name": "Google: Gemma 2 9B", "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.09, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Gemini" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "google/gemma-2-9b-it", "created": 1719532800 } }, { "id": "l3-euryale-70b", "name": "Sao10k: Llama 3 Euryale 70B v2.1", "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).\n\n- Better prompt adherence.\n- Better anatomy / spatial awareness.\n- Adapts much better to unique and custom formatting / reply formats.\n- Very creative, lots of unique swipes.\n- Is not restrictive during roleplays.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 1.48, "currency": "USD" }, "output": { "per_million_tokens": 1.48, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "sao10k/l3-euryale-70b", "created": 1718668800 } }, { "id": "hermes-2-pro-llama-3-8b", "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "max_output_tokens": 2048, "pricing": { "input": { "per_million_tokens": 0.024999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.08, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nousresearch/hermes-2-pro-llama-3-8b", "created": 1716768000 } }, { "id": "mistral-7b-instruct:free", "name": "Mistral: Mistral 7B Instruct (free)", "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 16384, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-7b-instruct", "created": 1716768000 } }, { "id": "mistral-7b-instruct", "name": "Mistral: Mistral 7B Instruct", "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.028, "currency": "USD" }, "output": { "per_million_tokens": 0.054, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-7b-instruct", "created": 1716768000 } }, { "id": "mistral-7b-instruct-v0.3", "name": "Mistral: Mistral 7B Instruct v0.3", "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 4096, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-7b-instruct-v0.3", "created": 1716768000 } }, { "id": "phi-3-mini-128k-instruct", "name": "Microsoft: Phi-3 Mini 128K Instruct", "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "microsoft/phi-3-mini-128k-instruct", "created": 1716681600 } }, { "id": "phi-3-medium-128k-instruct", "name": "Microsoft: Phi-3 Medium 128K Instruct", "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.\n\nFor 4k context length, try [Phi-3 Medium 4K](/models/microsoft/phi-3-medium-4k-instruct).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "microsoft/phi-3-medium-128k-instruct", "created": 1716508800 } }, { "id": "llama-guard-2-8b", "name": "Meta: LlamaGuard 2 8B", "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-guard-2-8b", "created": 1715558400 } }, { "id": "gpt-4o-2024-05-13", "context_window": 128000, "max_output_tokens": 4096, "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" }, "cache_read": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "structured_outputs", "tools", "web_search_options", "GPT" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4o-2024-05-13", "created": 1715558400 }, "name": "OpenAI: GPT-4o (2024-05-13)", "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ] }, { "id": "gpt-4o", "description": "GPT-4o (“o” stands for “omni”) is a new-generation multimodal model designed for more natural human–computer interaction. It can accept any combination of text, audio, image, and video as input, and generate multimodal outputs including text, audio, and images. With audio response latency as low as 232 milliseconds on average around 320 milliseconds, it approaches real human conversational speed. The model delivers strong performance in English text and code, significantly improved multilingual understanding, and outstanding capabilities in visual and audio perception, while offering faster API performance and substantially reduced cost for real-time and complex multimodal applications.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs", "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4o", "created": 1715558400 }, "name": "OpenAI: GPT-4o", "owned_by": "openrouter", "output_modalities": [ "TEXT" ] }, { "id": "gpt-4o:extended", "name": "OpenAI: GPT-4o (extended)", "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 6, "currency": "USD" }, "output": { "per_million_tokens": 18, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "structured_outputs", "tools", "web_search_options", "GPT" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4o", "created": 1715558400 } }, { "id": "llama-3-70b-instruct", "name": "Meta: Llama 3 70B Instruct", "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3-70b-instruct", "created": 1713398400 } }, { "id": "llama-3-8b-instruct", "name": "Meta: Llama 3 8B Instruct", "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.06, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3-8b-instruct", "created": 1713398400 } }, { "id": "mixtral-8x22b-instruct", "name": "Mistral: Mixtral 8x22B Instruct", "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:\n- strong math, coding, and reasoning\n- large context length (64k)\n- fluency in English, French, Italian, German, and Spanish\n\nSee benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).\n#moe", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mixtral-8x22b-instruct", "created": 1713312000 } }, { "id": "wizardlm-2-8x22b", "name": "WizardLM-2 8x22B", "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.48, "currency": "USD" }, "output": { "per_million_tokens": 0.48, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "microsoft/wizardlm-2-8x22b", "created": 1713225600 } }, { "id": "gpt-4-turbo", "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "openai/gpt-4-turbo", "created": 1712620800 }, "name": "OpenAI: GPT-4 Turbo", "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 4096 }, { "id": "claude-3-haiku", "name": "Anthropic: Claude 3 Haiku", "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 4096, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 1.25, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.03, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-3-haiku", "created": 1710288000 } }, { "id": "claude-3-opus", "name": "Anthropic: Claude 3 Opus", "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 4096, "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 75, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text+image->text", "tools", "Claude" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "anthropic/claude-3-opus", "created": 1709596800 } }, { "id": "mistral-large", "name": "Mistral Large", "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-large", "created": 1708905600 } }, { "id": "gpt-3.5-turbo-0613", "pricing": { "input": { "per_million_tokens": 1.5, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-3.5-turbo-0613", "created": 1706140800 }, "name": "OpenAI: GPT-3.5 Turbo (older v0613)", "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4095, "max_output_tokens": 4096 }, { "id": "gpt-4-turbo-preview", "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-4-turbo-preview", "created": 1706140800 }, "name": "OpenAI: GPT-4 Turbo Preview", "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 4096 }, { "id": "mistral-tiny", "name": "Mistral Tiny", "description": "Note: This model is being deprecated. Recommended replacement is the newer [Ministral 8B](/mistral/ministral-8b)\n\nThis model is currently powered by Mistral-7B-v0.2, and incorporates a \"better\" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 0.25, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-tiny", "created": 1704844800 } }, { "id": "mistral-7b-instruct-v0.2", "name": "Mistral: Mistral 7B Instruct v0.2", "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-7b-instruct-v0.2", "created": 1703721600 } }, { "id": "mixtral-8x7b-instruct", "name": "Mistral: Mixtral 8x7B Instruct", "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 0.54, "currency": "USD" }, "output": { "per_million_tokens": 0.54, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mixtral-8x7b-instruct", "created": 1702166400 } }, { "id": "noromaid-20b", "name": "Noromaid 20B", "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.\n\n#merge #uncensored", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 1.75, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Llama2" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "neversleep/noromaid-20b", "created": 1700956800 } }, { "id": "goliath-120b", "name": "Goliath 120B", "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 6144, "max_output_tokens": 1024, "pricing": { "input": { "per_million_tokens": 6, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama2" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "alpindale/goliath-120b", "created": 1699574400 } }, { "id": "auto", "name": "Auto Router", "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-5](/openai/gpt-5)\n- [openai/gpt-5-mini](/openai/gpt-5-mini)\n- [openai/gpt-5-nano](/openai/gpt-5-nano)\n- [openai/gpt-4.1-nano](/openai/gpt-4.1-nano)\n- [openai/gpt-4.1](/openai/gpt-4.1)\n- [openai/gpt-4.1-mini](/openai/gpt-4.1-mini)\n- [openai/gpt-4.1](/openai/gpt-4.1)\n- [openai/gpt-4o-mini](/openai/gpt-4o-mini)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-opus-4-1](/anthropic/claude-opus-4-1)\n- [anthropic/claude-sonnet-4-0](/anthropic/claude-sonnet-4-0)\n- [anthropic/claude-3-7-sonnet-latest](/anthropic/claude-3-7-sonnet-latest)\n- [google/gemini-2.5-pro](/google/gemini-2.5-pro)\n- [google/gemini-2.5-flash](/google/gemini-2.5-flash)\n- [mistral/mistral-large-latest](/mistral/mistral-large-latest)\n- [mistral/mistral-medium-latest](/mistral/mistral-medium-latest)\n- [mistral/mistral-small-latest](/mistral/mistral-small-latest)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [x-ai/grok-3](/x-ai/grok-3)\n- [x-ai/grok-3-mini](/x-ai/grok-3-mini)\n- [x-ai/grok-4](/x-ai/grok-4)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [perplexity/sonar](/perplexity/sonar)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 2000000, "metadata": { "source": "openrouter", "tags": [ "text->text", "Router" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openrouter/auto", "created": 1699401600 } }, { "id": "gpt-4-1106-preview", "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-4-1106-preview", "created": 1699228800 }, "name": "OpenAI: GPT-4 Turbo (older v1106)", "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 4096 }, { "id": "gpt-3.5-turbo-instruct", "pricing": { "input": { "per_million_tokens": 1.5, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-3.5-turbo-instruct", "created": 1695859200 }, "name": "OpenAI: GPT-3.5 Turbo Instruct", "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4095, "max_output_tokens": 4096 }, { "id": "mistral-7b-instruct-v0.1", "name": "Mistral: Mistral 7B Instruct v0.1", "description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 2824, "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.19, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-7b-instruct-v0.1", "created": 1695859200 } }, { "id": "gpt-3.5-turbo-16k", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-3.5-turbo-16k", "created": 1693180800 }, "name": "OpenAI: GPT-3.5 Turbo 16k", "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16385, "max_output_tokens": 4096 }, { "id": "weaver", "name": "Mancer: Weaver (alpha)", "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8000, "max_output_tokens": 2000, "pricing": { "input": { "per_million_tokens": 1.125, "currency": "USD" }, "output": { "per_million_tokens": 1.125, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "Llama2" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mancer/weaver", "created": 1690934400 } }, { "id": "remm-slerp-l2-13b", "name": "ReMM SLERP 13B", "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 6144, "pricing": { "input": { "per_million_tokens": 0.44999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.65, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Llama2" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "undi95/remm-slerp-l2-13b", "created": 1689984000 } }, { "id": "mythomax-l2-13b", "name": "MythoMax 13B", "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.06, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->text", "structured_outputs", "Llama2" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "gryphe/mythomax-l2-13b", "created": 1688256000 } }, { "id": "gpt-4-0314", "pricing": { "input": { "per_million_tokens": 30, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-4-0314", "created": 1685232000 }, "name": "OpenAI: GPT-4 (older v0314)", "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8191, "max_output_tokens": 4096 }, { "id": "gpt-4", "pricing": { "input": { "per_million_tokens": 30, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-4", "created": 1685232000 }, "name": "OpenAI: GPT-4", "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8191, "max_output_tokens": 4096 }, { "id": "gpt-3.5-turbo", "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "tools", "GPT" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "openai/gpt-3.5-turbo", "created": 1685232000 }, "name": "OpenAI: GPT-3.5 Turbo", "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16385, "max_output_tokens": 4096 }, { "id": "gte-base", "name": "Thenlper: GTE-Base", "description": "The gte-base embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, delivering efficient and effective semantic embeddings optimized for textual similarity, semantic search, and clustering applications.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.005, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "thenlper/gte-base-20251117", "created": 1763433820 } }, { "id": "gte-large", "name": "Thenlper: GTE-Large", "description": "The gte-large embedding model converts English sentences, paragraphs and moderate-length documents into a 1024-dimensional dense vector space, delivering high-quality semantic embeddings optimized for information retrieval, semantic textual similarity, reranking and clustering tasks. Trained via multi-stage contrastive learning on a large domain-diverse relevance corpus, it offers excellent performance across general-purpose embedding use-cases.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.01, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "thenlper/gte-large-20251117", "created": 1763433655 } }, { "id": "e5-large-v2", "name": "Intfloat: E5-Large-v2", "description": "The e5-large-v2 embedding model maps English sentences, paragraphs, and documents into a 1024-dimensional dense vector space, delivering high-accuracy semantic embeddings optimized for retrieval, semantic search, reranking, and similarity-scoring tasks.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.01, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "intfloat/e5-large-v2-20251117", "created": 1763433432 } }, { "id": "e5-base-v2", "name": "Intfloat: E5-Base-v2", "description": "The e5-base-v2 embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, similarity scoring, retrieval and clustering.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.005, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "intfloat/e5-base-v2-20251117", "created": 1763433192 } }, { "id": "multilingual-e5-large", "name": "Intfloat: Multilingual-E5-Large", "description": "The multilingual-e5-large embedding model encodes sentences, paragraphs, and documents across over 90 languages into a 1024-dimensional dense vector space, delivering robust semantic embeddings optimized for multilingual retrieval, cross-language similarity, and large-scale data search.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.01, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "intfloat/multilingual-e5-large-20251117", "created": 1763433047 } }, { "id": "paraphrase-minilm-l6-v2", "name": "Sentence Transformers: paraphrase-MiniLM-L6-v2", "description": "The paraphrase-MiniLM-L6-v2 embedding model converts sentences and short paragraphs into a 384-dimensional dense vector space, producing high-quality semantic embeddings optimized for paraphrase detection, semantic similarity scoring, clustering, and lightweight retrieval tasks.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.005, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "sentence-transformers/paraphrase-minilm-l6-v2-20251117", "created": 1763432454 } }, { "id": "all-minilm-l12-v2", "name": "Sentence Transformers: all-MiniLM-L12-v2", "description": "The all-MiniLM-L12-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, clustering, and similarity-scoring.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.005, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "sentence-transformers/all-minilm-l12-v2-20251117", "created": 1763432155 } }, { "id": "bge-base-en-v1.5", "name": "BAAI: bge-base-en-v1.5", "description": "The bge-base-en-v1.5 embedding model converts English sentences and paragraphs into 768-dimensional dense vectors, delivering efficient, high-quality semantic embeddings optimized for retrieval, semantic search, and document-matching workflows. This version (v1.5) features improved similarity-score distribution and stronger retrieval performance out of the box.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.005, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "baai/bge-base-en-v1.5-20251117", "created": 1763431837 } }, { "id": "multi-qa-mpnet-base-dot-v1", "name": "Sentence Transformers: multi-qa-mpnet-base-dot-v1", "description": "The multi-qa-mpnet-base-dot-v1 embedding model transforms sentences and short paragraphs into a 768-dimensional dense vector space, generating high-quality semantic embeddings optimized for question-and-answer retrieval, semantic search, and similarity-scoring across diverse content.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.005, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117", "created": 1763431339 } }, { "id": "bge-large-en-v1.5", "name": "BAAI: bge-large-en-v1.5", "description": "The bge-large-en-v1.5 embedding model maps English sentences, paragraphs, and documents into a 1024-dimensional dense vector space, delivering high-fidelity semantic embeddings optimized for semantic search, document retrieval, and downstream NLP tasks in English.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.01, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "baai/bge-large-en-v1.5-20251117", "created": 1763431087 } }, { "id": "bge-m3", "name": "BAAI: bge-m3", "description": "The bge-m3 embedding model encodes sentences, paragraphs, and long documents into a 1024-dimensional dense vector space, delivering high-quality semantic embeddings optimized for multilingual retrieval, semantic search, and large-context applications.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.01, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "baai/bge-m3-20251117", "created": 1763424372 } }, { "id": "all-mpnet-base-v2", "name": "Sentence Transformers: all-mpnet-base-v2", "description": "The all-mpnet-base-v2 embedding model encodes sentences and short paragraphs into a 768-dimensional dense vector space, providing high-fidelity semantic embeddings well suited for tasks like information retrieval, clustering, similarity scoring, and text ranking.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.005, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "sentence-transformers/all-mpnet-base-v2-20251117", "created": 1763421830 } }, { "id": "all-minilm-l6-v2", "name": "Sentence Transformers: all-MiniLM-L6-v2", "description": "The all-MiniLM-L6-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, enabling high-quality semantic representations that are ideal for downstream tasks such as information retrieval, clustering, similarity scoring, and text ranking.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "pricing": { "input": { "per_million_tokens": 0.005, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "sentence-transformers/all-minilm-l6-v2-20251117", "created": 1763421176 } }, { "id": "mistral-embed-2312", "name": "Mistral: Mistral Embed 2312", "description": "Mistral Embed is a specialized embedding model for text data, optimized for semantic search and RAG applications. Developed by Mistral AI in late 2023, it produces 1024-dimensional vectors that effectively capture semantic relationships in text.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings", "structured_outputs", "Mistral" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "mistralai/mistral-embed-2312", "created": 1761944622 } }, { "id": "gemini-embedding-001", "description": "Latest version", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.15, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding", "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "google/gemini-embedding-001", "created": 1761943410 }, "name": "Google: Gemini Embedding 001", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "output_modalities": [ "TEXT" ], "context_window": 20000 }, { "id": "text-embedding-ada-002", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding", "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "openai/text-embedding-ada-002", "created": 1761865798 }, "name": "OpenAI: Text Embedding Ada 002", "description": "text-embedding-ada-002 is OpenAI's legacy text embedding model.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "output_modalities": [ "TEXT" ], "context_window": 8192 }, { "id": "codestral-embed-2505", "name": "Mistral: Codestral Embed 2505", "description": "Mistral Codestral Embed is specially designed for code, perfect for embedding code databases, repositories, and powering coding assistants with state-of-the-art retrieval.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "openrouter", "tags": [ "text->embeddings", "structured_outputs", "Mistral" ], "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "mistralai/codestral-embed-2505", "created": 1761864460 } }, { "id": "text-embedding-3-large", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.13, "currency": "USD" }, "output": { "per_million_tokens": 0.13, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding", "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "openai/text-embedding-3-large", "created": 1761862866 }, "name": "OpenAI: Text Embedding 3 Large", "description": "text-embedding-3-large is OpenAI's most capable embedding model for both english and non-english tasks. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "output_modalities": [ "TEXT" ], "context_window": 8192 }, { "id": "text-embedding-3-small", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding", "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "openai/text-embedding-3-small", "created": 1761857455 }, "name": "OpenAI: Text Embedding 3 Small", "description": " text-embedding-3-small is OpenAI's improved, more performant version of the ada embedding model. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "output_modalities": [ "TEXT" ], "context_window": 8192 }, { "id": "qwen3-embedding-8b", "description": "The Qwen3 Embedding model series is the latest proprietary model family from Qwen, specifically designed for text embedding and ranking tasks. Based on the dense base models of the Qwen3 series, it offers comprehensive text embedding and reranking models in various sizes (0.6B, 4B, and 8B). This series inherits the excellent multilingual capabilities, long-text understanding, and reasoning skills of its base models. The Qwen3 Embedding series demonstrates significant advancements in various text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bilingual text mining.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.068, "currency": "USD" }, "output": { "per_million_tokens": 0.068, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding", "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "qwen/qwen3-embedding-8b", "created": 1761680622 }, "name": "Qwen: Qwen3 Embedding 8B", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "output_modalities": [ "TEXT" ], "context_window": 32768 }, { "id": "qwen3-embedding-4b", "description": "The Qwen3 Embedding model series is the latest proprietary model family from Qwen, specifically designed for text embedding and ranking tasks. Based on the dense base models of the Qwen3 series, it offers comprehensive text embedding and reranking models in various sizes (0.6B, 4B, and 8B). This series inherits the excellent multilingual capabilities, long-text understanding, and reasoning skills of its base models. The Qwen3 Embedding series demonstrates significant advancements in various text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bilingual text mining.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.068, "currency": "USD" }, "output": { "per_million_tokens": 0.068, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding", "category": "language-model", "original_architecture": "text->embeddings", "canonical_slug": "qwen/qwen3-embedding-4b", "created": 1761662922 }, "name": "Qwen: Qwen3 Embedding 4B", "owned_by": "openrouter", "capabilities": [ "EMBEDDING" ], "output_modalities": [ "TEXT" ], "context_window": 32768 }, { "id": "stepfun-ai/step3", "description": "Step3 is a multimodal reasoning model released by StepFun. It uses a Mixture‑of‑Experts (MoE) architecture with 321 billion total parameters and 38 billion activation parameters. The model follows an end‑to‑end design that reduces decoding cost while delivering top‑tier performance on vision‑language reasoning tasks. Thanks to the combined use of Multi‑Head Factorized Attention (MFA) and Attention‑FFN Decoupling (AFD), Step3 remains highly efficient on both flagship and low‑end accelerators. During pre‑training, it processed over 20 trillion text tokens and 4 trillion image‑text mixed tokens, covering more than ten languages. On benchmarks for mathematics, code, and multimodal tasks, Step3 consistently outperforms other open‑source models.", "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 2.75, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "include_reasoning", "reasoning", "structured_outputs", "tools" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "stepfun-ai/step3", "created": 1756415375 }, "name": "StepFun: Step3", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "REASONING", "IMAGE_RECOGNITION" ], "input_modalities": [ "VISION", "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "max_output_tokens": 65536 }, { "id": "google/gemma-3-27b-it", "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. Gemma 3 models are multimodal, handling text and image input and generating text output, with open weights for both pre-trained variants and instruction-tuned variants. Gemma 3 has a large, 128K context window, multilingual support in over 140 languages, and is available in more sizes than previous versions. Gemma 3 models are well-suited for a variety of text generation and image understanding tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as laptops, desktops or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone. This model is ready for commercial use.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text+image->text", "structured_outputs", "tools", "Gemini" ], "category": "language-model", "original_architecture": "text+image->text", "canonical_slug": "google/gemma-3-27b-it", "created": 1741756359 }, "name": "Google: Gemma 3 27B", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_RECOGNITION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 96000, "max_output_tokens": 96000 }, { "id": "nvidia/llama-3.1-nemotron-70b-instruct", "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "tools", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "nvidia/llama-3.1-nemotron-70b-instruct", "created": 1728950400 }, "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "max_output_tokens": 16384 }, { "id": "meta-llama/llama-3.2-3b-instruct:free", "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "Llama3" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "meta-llama/llama-3.2-3b-instruct", "created": 1727222400 }, "name": "Meta: Llama 3.2 3B Instruct (free)", "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "openrouter", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072 }, { "id": "google/gemma-2-27b-it", "pricing": { "input": { "per_million_tokens": 0.8, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "structured_outputs", "Gemini" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "google/gemma-2-27b-it", "created": 1720828800 }, "name": "Google: Gemma 2 27B", "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).\n\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", "owned_by": "openrouter", "capabilities": [ "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192 }, { "id": "mistralai/mistral-7b-instruct:free", "pricing": { "input": { "per_million_tokens": 0.002, "currency": "USD" }, "output": { "per_million_tokens": 0.002, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "text->text", "tools", "Mistral" ], "category": "language-model", "original_architecture": "text->text", "canonical_slug": "mistralai/mistral-7b-instruct", "created": 1716768000 }, "name": "Mistral: Mistral 7B Instruct (free)", "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", "owned_by": "openrouter", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "max_output_tokens": 16384 }, { "id": "claude-opus-4-5", "description": "Claude Opus 4.5 is Anthropic’s latest frontier reasoning model, optimized for complex engineering, agentic workflows, and long-horizon computer use. It features strong multimodal capabilities, improved resistance to prompt injection, and a new Verbosity parameter to control token efficiency. With advanced tool use, extended context, and multi-agent support, Opus 4.5 excels in autonomous research, debugging, planning, and spreadsheet/browser operations.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT", "CODE_EXECUTION", "WEB_SEARCH", "FILE_INPUT", "FILE_SEARCH", "COMPUTER_USE" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 25, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.5, "currency": "USD" }, "cache_write": { "per_million_tokens": 6.25, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "gemini-3-pro-preview-search", "description": "Gemini-3-pro-search integrates Google's official search functionality; the search feature incurs an additional separate fee log directly incorporated into the scoring, but the log details are not displayed; this will be fixed in the future to show the details; it only supports OpenAI-compatible format calls and does not support the Gemini SDK; for the Gemini native SDK, please directly set the official search parameters.", "capabilities": [ "REASONING", "WEB_SEARCH", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "output_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 12, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "web", "deepsearch", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm", "original_features": "thinking,web,deepsearch,tools,function_calling,structured_outputs,long_context" } }, { "id": "gpt-5.1-chat-latest", "description": "GPT-5.1 Chat refers to the GPT-5.1 snapshot currently used in ChatGPT and is optimized for conversational use cases. While GPT-5.1 is recommended for most API applications, GPT-5.1 Chat is ideal for testing the latest improvements in chat-based interactions.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 128000, "max_output_tokens": 16384, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.125, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "function_calling,structured_outputs" } }, { "id": "mistral-large-3", "description": "Mistral Large 3 is a MoE model with 67.5B total parameters and 41B active parameters, supporting a 256K-token context window. Trained from scratch on 3,000 NVIDIA H200 GPUs, it is one of the strongest permissively licensed open-weight models available.\n\nDesigned for advanced reasoning and long-context understanding, Mistral Large 3 delivers performance on par with the best instruction-tuned open-weight models for general-purpose tasks, while also offering image understanding capabilities. Its multilingual strengths are particularly notable for non-English/Chinese languages, making it well-suited for global applications.\n\nTypical use cases include enterprise assistants, multilingual customer support, content generation and editing, data analysis over long documents, code assistance, and research workflows that require handling large corpora or complex instructions. With its MoE architecture, Mistral Large 3 balances strong performance with efficient inference, providing a versatile backbone for building reliable, production-grade AI systems.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 256000, "max_output_tokens": 256000, "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "function_calling,structured_outputs" } }, { "id": "claude-haiku-4-5", "description": "Claude Haiku 4.5 is a fast, affordable, and highly capable AI model, excelling at coding and agentic tasks. Its combination of speed and low cost makes it ideal for powering real-time applications like chatbots, high-volume free services, and specialized \"sub-agents\" for complex tasks in coding, finance, and research. It can also handle common business tasks like creating office documents and assisting with strategy and analysis.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 204800, "max_output_tokens": 131072, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 5.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.11, "currency": "USD" }, "cache_write": { "per_million_tokens": 1.375, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "claude-sonnet-4-5", "description": "Sonnet 4.5 is the best model in the world for agents, coding, and computer usage. It is also our most accurate and detailed model for long-running tasks, with enhanced knowledge in coding, finance, and cybersecurity. \nThis model supports a thinking parameter to enable thinking requests in Claude mode.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 1000000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 3.3, "currency": "USD" }, "output": { "per_million_tokens": 16.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.33, "currency": "USD" }, "cache_write": { "per_million_tokens": 4.125, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "grok-4-1-fast-non-reasoning", "description": "Grok 4.1 is a new conversational model with significant improvements in real-world usability, delivering exceptional performance in creative, emotional, and collaborative interactions. It is more perceptive to nuanced user intent, more engaging to converse with, and more coherent in personality, while fully preserving its core intelligence and reliability. Built on large-scale reinforcement learning infrastructure, the model is optimized for style, personality, helpfulness, and alignment, and leverages frontier agentic reasoning models as reward evaluators to autonomously assess and iterate on responses at scale, significantly enhancing overall interaction quality.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 2000000, "max_output_tokens": 2000000, "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "grok-4-1-fast-reasoning", "description": "Grok 4.1 is a new conversational model with significant improvements in real-world usability, delivering exceptional performance in creative, emotional, and collaborative interactions. It is more perceptive to nuanced user intent, more engaging to converse with, and more coherent in personality, while fully preserving its core intelligence and reliability. Built on large-scale reinforcement learning infrastructure, the model is optimized for style, personality, helpfulness, and alignment, and leverages frontier agentic reasoning models as reward evaluators to autonomously assess and iterate on responses at scale, significantly enhancing overall interaction quality.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 2000000, "max_output_tokens": 2000000, "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "deepseek-v3.2-think", "description": "DeepSeek-V3.2 is an efficient large language model equipped with DeepSeek Sparse Attention and reinforced reasoning performance, but its core strength lies in powerful agentic capabilities—enabled by large-scale task-synthesis that tightly integrates reasoning with real-world tool use, delivering robust, compliant, and generalizable agent behaviour. Users can toggle deeper reasoning through the reasoning_enabled switch.", "capabilities": [ "WEB_SEARCH", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 0.302, "currency": "USD" }, "output": { "per_million_tokens": 0.453, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0302, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "web", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "web,tools,function_calling,structured_outputs" } }, { "id": "deepseek-math-v2", "description": "The mathematical reasoning of large language models has shifted from pursuing correct answers to ensuring rigorous processes. Research proposes a new paradigm of \"self-verification,\" training specialized verifiers to evaluate proof steps and using this to train generators for self-error correction. The two co-evolve, pushing the boundaries of capability. Ultimately, the model achieves gold medal level in top competitions like the IMO, demonstrating the great potential of deep reasoning.", "capabilities": [ "REASONING", "WEB_SEARCH" ], "input_modalities": [ "TEXT" ], "context_window": 163000, "max_output_tokens": 163000, "pricing": { "input": { "per_million_tokens": 0.492, "currency": "USD" }, "output": { "per_million_tokens": 1.968, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0984, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "web" ], "original_types": "llm", "original_features": "thinking,web" } }, { "id": "deepseek-v3.2-exp-think", "description": "The model DeepSeek-V3.2-Exp-Think is officially named deepseek-reasoner. It is an experimental version. As an intermediate step towards the next-generation architecture, V3.2-Exp introduces DeepSeek Sparse Attention (a sparse attention mechanism) based on V3.1-Terminus, exploring and validating exploratory optimizations for training and inference efficiency on long texts.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 131000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 0.274, "currency": "USD" }, "output": { "per_million_tokens": 0.411, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0274, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "deepseek-v3.1-think", "description": "Thinking mode of DeepSeek-V3.1; \nDeepSeek V3.1 is a text generation model provided by DeepSeek, featuring a hybrid reasoning architecture that achieves an effective integration of thinking and non-thinking modes.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.56, "currency": "USD" }, "output": { "per_million_tokens": 1.68, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "gpt-5-chat-latest", "description": "GPT-5 Chat points to the GPT-5 snapshot currently used in ChatGPT. GPT-5 is our next-generation, high-intelligence flagship model. It accepts both text and image inputs, and produces text outputs.", "input_modalities": [ "TEXT", "VISION" ], "context_window": 400000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.125, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "claude-opus-4-1", "description": "Opus 4.1 is an upgraded version of Claude Opus 4, with improvements mainly in agent tasks, practical coding, and reasoning. Compared to Opus 4, there is a slight improvement in software engineering accuracy; Opus 4.1 has higher accuracy at 74.5%.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 200000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 16.5, "currency": "USD" }, "output": { "per_million_tokens": 82.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "doubao-seedream-4-5", "description": "Seedream 4.5 is ByteDance's latest multimodal image model, integrating capabilities such as text-to-image, image-to-image, and multi-image output, along with incorporating common sense and reasoning abilities. Compared to the previous 4.0 model, it significantly improves generation quality, offering better editing consistency and multi-image fusion effects, with more precise control over image details. The generation of small text and small faces is more natural.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "sora-2", "description": "Sora-2 is the next-generation text-to-video model evolved from Sora, optimized for higher visual realism, stronger physical consistency, and longer temporal coherence. It delivers more stable character consistency, complex motion rendering, camera control, and narrative continuity, while supporting higher resolutions and minute-level video generation for film production, advertising, virtual content creation, and creative multimedia workflows.", "capabilities": [ "VIDEO_GENERATION" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "sora-2-pro", "description": "OpenAI video model Sora2-pro official API.", "capabilities": [ "VIDEO_GENERATION" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "gpt-4o-mini-audio-preview", "input_modalities": [ "TEXT", "AUDIO" ], "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "jimeng-3.0-1080p", "description": "DreamVideo 3.0 Pro is a professional-grade text-to-video and image-to-video model built on the Dream framework, delivering a major breakthrough in video generation quality. This version demonstrates strong performance across multiple dimensions, including narrative coherence, instruction following, dynamic fluidity, and visual detail. It supports multi-shot storytelling and generates 1080P high-definition videos with a professional cinematic texture. The model also enables diverse and expressive stylistic rendering, making it well suited for creative production and visual storytelling.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "jimeng-3.0-720p", "description": "DreamVideo 3.0 Pro is a professional-grade text-to-video and image-to-video model built on the Dream framework, delivering a major breakthrough in video generation quality. This version demonstrates strong performance across multiple dimensions, including narrative coherence, instruction following, dynamic fluidity, and visual detail. It supports multi-shot storytelling and generates 1080P high-definition videos with a professional cinematic texture. The model also enables diverse and expressive stylistic rendering, making it well suited for creative production and visual storytelling.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "jimeng-3.0-pro", "description": "DreamVideo 3.0 Pro is a professional-grade text-to-video and image-to-video model built on the Dream framework, delivering a major breakthrough in video generation quality. This version demonstrates strong performance across multiple dimensions, including narrative coherence, instruction following, dynamic fluidity, and visual detail. It supports multi-shot storytelling and generates 1080P high-definition videos with a professional cinematic texture. The model also enables diverse and expressive stylistic rendering, making it well suited for creative production and visual storytelling.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "kimi-for-coding-free", "description": "kimi-for-coding-free is a free and open version offered by AIHubMix specifically for Kimi users. To maintain stable service operations, the following usage limits apply: a maximum of 10 requests per minute, 1,000 total requests per day, and a daily quota of 5 million tokens.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 256000, "pricing": { "input": { "per_million_tokens": 0, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "wan2.2-i2v-plus", "description": "The newly upgraded Tongyi Wanxiang 2.2 text-to-video offers higher video quality. It optimizes video generation stability and success rate, features stronger instruction-following capabilities, consistently maintains image text, portrait, and product consistency, and provides precise camera motion control.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "wan2.2-t2v-plus", "description": "The newly upgraded Tongyi Wanxiang 2.2 text-to-video offers higher video quality. It can stably generate large-scale complex motions, supports cinematic-level visual performance and control, and features enhanced instruction-following capabilities to achieve realistic physical world reproduction.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "wan2.5-i2v-preview", "description": "Tongyi Wanxiang 2.5 - Text-to-Video Preview features a newly upgraded technical architecture, supporting sound generation synchronized with visuals, 10-second long video generation, stronger instruction-following capabilities, and further improvements in motion ability and visual quality.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "wan2.5-t2v-preview", "description": "Tongyi Wanxiang 2.5 - Text-to-Video Preview, newly upgraded model architecture, supports sound generation synchronized with visuals, supports 10-second long video generation, enhanced instruction compliance, improved motion capability, and further enhanced visual quality.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "web-sora-2", "description": "This model is an unofficial reverse-engineered API of the OpenAI web version sora-2-hd, for entertainment purposes only. Charges apply regardless of generation success or failure, billed per use. Please avoid using it if you mind. It can be used via the chat interface, allowing intuitive image uploads: you can directly upload images through the chat interface as the basis for video generation.\n\nPrecise parameter control: by appending commands such as \"landscape/portrait,\" \"16:9/9:16,\" \"10 seconds/15 seconds,\" etc., at the end of the prompt, you can directly define the video's aspect ratio and duration.", "capabilities": [ "VIDEO_GENERATION" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "web-sora-2-pro", "description": "This model is an unofficial reverse-engineered API of the OpenAI web version sora-2-hd, for entertainment purposes only. Charges apply regardless of generation success or failure, billed per use. Please avoid using it if you mind. It can be used via the chat interface, allowing intuitive image uploads: you can directly upload images through the chat interface as the basis for video generation.\n\nPrecise parameter control: by appending commands such as \"landscape/portrait,\" \"16:9/9:16,\" \"10 seconds/15 seconds,\" etc., at the end of the prompt, you can directly define the video's aspect ratio and duration.", "capabilities": [ "VIDEO_GENERATION" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "cc-glm-4.6", "description": "for claude code", "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.22, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "coding-glm-4.6", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.22, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.010998, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "coding-glm-4.6-free", "description": "coding-glm-4.6-free is the open and free version of coding-glm-4.6. To ensure stable service performance, usage limits are in place: up to 10 requests per minute, 1,000 requests per day, and a daily token allowance of 5 million.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 200000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 0, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "coding-minimax-m2", "description": "coding-minimax-m2 is a free and open version offered by AIHubMix specifically for MiniMax users. To maintain stable service operations, the following usage limits apply: a maximum of 10 requests per minute, 1,000 total requests per day, and a daily quota of 5 million tokens.204800", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 204800, "max_output_tokens": 13100, "pricing": { "input": { "per_million_tokens": 0, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "coding-minimax-m2-free", "description": "coding-minimax-m2-free is a free and open version offered by AIHubMix specifically for MiniMax users. To maintain stable service operations, the following usage limits apply: a maximum of 10 requests per minute, 1,000 total requests per day, and a daily quota of 5 million tokens.204800", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 204800, "max_output_tokens": 13100, "pricing": { "input": { "per_million_tokens": 0, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "doubao-seed-code-free", "description": "doubao-seed-code-free is a free and open version offered by AIHubMix specifically for Doubao users. To maintain stable service operations, the following usage limits apply: a maximum of 10 requests per minute, 1,000 total requests per day, and a daily quota of 5 million tokens.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 256000, "max_output_tokens": 256000, "pricing": { "input": { "per_million_tokens": 0, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "flux-2-flex", "description": "FLUX.2 is purpose-built for real-world creative production workflows. It delivers high-quality images while maintaining character and style consistency across multiple reference images, shows exceptional understanding and execution of structured prompts, and supports complex text reading and writing. It also adheres to brand guidelines, handles lighting, layout, and logo elements with stability, and enables image editing at resolutions up to 4MP — all while preserving fine details, striking a balance between creativity and professional-grade visual output.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "flux-2-pro", "description": "FLUX.2 is purpose-built for real-world creative production workflows. It delivers high-quality images while maintaining character and style consistency across multiple reference images, shows exceptional understanding and execution of structured prompts, and supports complex text reading and writing. It also adheres to brand guidelines, handles lighting, layout, and logo elements with stability, and enables image editing at resolutions up to 4MP — all while preserving fine details, striking a balance between creativity and professional-grade visual output.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "gemini-2.5-pro-search", "description": "gemini-2.5-pro-search integrates Google's official search functionality; the search feature will have an additional separate fee log directly incorporated into the scoring, with detailed logs not displayed; this will be fixed and displayed later; only supports OpenAI-compatible formats for invocation, does not support Gemini SDK; for Gemini's native SDK, please set parameters directly using the official search parameters.", "capabilities": [ "REASONING", "WEB_SEARCH", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.31, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "thinking", "web", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm,search", "original_features": "thinking,web,tools,function_calling,structured_outputs,long_context" } }, { "id": "deepseek-v3.1-fast", "description": "The model provider is the Sophon platform. DeepSeek V3.1 Fast is the high-TPS speed version of DeepSeek V3.1.\nHybrid thinking mode: By modifying the chat template, a single model can simultaneously support both thinking and non-thinking modes.\nSmarter tool usage: Through post-training optimization, the model’s performance in tool utilization and agent tasks has improved significantly.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 163000, "max_output_tokens": 163000, "pricing": { "input": { "per_million_tokens": 1.096, "currency": "USD" }, "output": { "per_million_tokens": 3.288, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "claude-sonnet-4-0", "description": "Claude Sonnet 4 is a significant upgrade to Sonnet 3.7, delivering superior performance in coding and reasoning with enhanced precision and control. Achieving a state-of-the-art 72.7% on SWE-bench, the model expertly balances advanced capability with computational efficiency. Key improvements include more reliable codebase navigation and complex instruction following, making it ideal for a wide range of applications, from routine coding to complex software development projects.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 1000000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 3.3, "currency": "USD" }, "output": { "per_million_tokens": 16.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.33, "currency": "USD" }, "cache_write": { "per_million_tokens": 4.125, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "kimi-for-coding", "description": "imi-for-coding is a free and open version offered by AIHubMix specifically for Kimi users. To maintain stable service operations, the following usage limits apply: a maximum of 10 requests per minute, 1,000 total requests per day, and a daily quota of 5 million tokens.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 256000, "max_output_tokens": 256000, "pricing": { "input": { "per_million_tokens": 0, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "gemini-2.5-flash-nothink", "description": "Gemini-2.5-flash defaults to thinking enabled; to disable thinking, request the name gemini-2.5-flash-nothink, which only supports OpenAI-compatible format calls and does not support Gemini SDK; for the native Gemini SDK, please set the parameter budget=0 directly.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1047576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.499, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs,long_context" } }, { "id": "gemini-2.5-flash-search", "description": "gemini-2.5-flash-search integrates Google's official search functionality; the search feature will have an additional separate fee log directly incorporated into the scoring, with detailed logs not displayed; this will be fixed and displayed later; only supports OpenAI-compatible formats for invocation, does not support Gemini SDK; for Gemini's native SDK, please set parameters directly using the official search parameters.", "capabilities": [ "WEB_SEARCH", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.499, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "web", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm,search", "original_features": "web,tools,function_calling,structured_outputs,long_context" } }, { "id": "gemini-2.5-flash-preview-05-20-nothink", "description": "Gemini-2.5-flash-preview-05-20 is enabled by default for thinking; to disable it, request the name gemini-2.5-flash-preview-05-20-nothink.Only OpenAI-compatible format calls are supported; Gemini SDK is not supported. For the native Gemini SDK, please set the parameter budget=0 directly.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.499, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs,long_context" } }, { "id": "gemini-2.5-flash-preview-05-20-search", "description": "Gemini-2.5 Flash Preview 05-20 Search integrates Google's official search functionality; the search feature will have an additional separate fee log directly integrated into the scoring deduction, with detailed logs not displayed. It will be fixed and displayed later. Only OpenAI-compatible formats are supported for invocation; Gemini SDK is not supported. For Gemini's native SDK, please set parameters directly using the official search parameters.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.499, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm,search", "original_features": "tools,function_calling,structured_outputs,long_context" } }, { "id": "deepseek-v3-fast", "description": "V3 Ultra-Fast Version,The current price is a limited-time 50% discount and will return to the original price on July 31st. The original price is: input: $0.55/M, output: $2.2/M. The model provider is the Sophnet platform. DeepSeek V3 Fast is a high-TPS, ultra-fast version of DeepSeek V3 0324, featuring full-precision (non-quantized) performance, enhanced code and math capabilities, and faster responses!\n\nDeepSeek V3 0324 is a powerful Mixture-of-Experts (MoE) model with a total parameter count of 671B, activating 37B parameters per token.\nIt adopts Multi-Head Latent Attention (MLA) and the DeepSeekMoE architecture to achieve efficient inference and economical training costs.\nIt innovatively implements a load balancing strategy without auxiliary loss and sets multi-token prediction training targets to enhance performance.\nThe model is pre-trained on 14.8 trillion diverse, high-quality tokens and further optimized through supervised fine-tuning and reinforcement learning stages to fully realize its capabilities.\nComprehensive evaluations show that DeepSeek V3 outperforms other open-source models and rivals leading closed-source models in performance.\nThe entire training process only requires 2.788M H800 GPU hours and remains highly stable, with no irrecoverable loss spikes or rollbacks.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 32000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.56, "currency": "USD" }, "output": { "per_million_tokens": 2.24, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "veo-2.0-generate-001", "description": "Veo 2.0 is an advanced video generation model capable of producing high-quality videos based on text or image prompts. It excels in understanding real-world physics and human motion, resulting in fluid character movements and lifelike scenes. Veo 2.0 supports various visual styles and camera control options, including lens types, angles, and motion effects. Users can generate 8-second video clips at 720p resolution.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "VIDEO" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "veo3.1", "description": "veo3.1 reverse model, and other available model names that can be requested include: veo3.1-pro and veo3.1-components. The price is currently tentatively set to be calculated per token, approximately $0.05 per request.", "capabilities": [ "VIDEO_GENERATION" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 200, "currency": "USD" }, "output": { "per_million_tokens": 200, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "imagen-4.0", "description": "Imagen 4 is a high-quality text-to-image model developed by Google, designed for strong visual fidelity, diverse artistic styles, and precise controllability. It delivers near photographic realism with sharp details and natural lighting while significantly reducing common artifacts such as distorted hands. The model supports a wide range of styles including photorealistic, illustration, anime, oil painting, and pixel art, and offers flexible aspect ratios for use cases from content covers to mobile wallpapers. It also enables image editing and secondary creation on existing images, provides fast and stable generation, and offers strong commercial usability with high visual quality and reliable content safety.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "imagen-4.0-fast-generate-001", "description": "Imagen 4 is a new-generation image generation model designed to balance high-quality output, inference efficiency, and content safety. It supports image generation, digital watermarking with authenticity verification, user-configurable safety settings, and prompt enhancement via the Prompt Rewriter, while also delivering reliable person generation capabilities. The model ID is imagen-4.0-generate-001, making it suitable for professional creation, design workflows, and various generative AI applications.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "imagen-4.0-fast-generate-preview-06-06", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "imagen-4.0-generate-001", "description": "Imagen 4 is a new-generation image generation model designed to balance high-quality output, inference efficiency, and content safety. It supports image generation, digital watermarking with authenticity verification, user-configurable safety settings, and prompt enhancement via the Prompt Rewriter, while also delivering reliable person generation capabilities. The model ID is imagen-4.0-generate-001, making it suitable for professional creation, design workflows, and various generative AI applications.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "imagen-4.0-ultra-generate-001", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "imagen-4.0-ultra", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "gpt-image-1", "description": "Azure OpenAI’s gpt-image-1 image generation API offers both text-to-image generation and image-to-image editing with text guidance capabilities.\nBefore using this API, please ensure you have the latest OpenAI package installed by running pip install -U openai.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 40, "currency": "USD" }, "cache_read": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "gpt-image-1-mini", "description": "OpenAI image generation model gpt-image-1-mini\nBefore use, please run pip install -U openai to upgrade to the latest openai package.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 40, "currency": "USD" }, "cache_read": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "grok-4-fast-non-reasoning", "description": "Grok-4-fast is a cost-effective inference model developed by xAI that delivers cutting-edge performance with excellent token efficiency. The model features a 2 million token context window, advanced Web and X search capabilities, and a unified architecture supporting both \"inference\" and \"non-inference\" modes. Compared to Grok 4, it reduces thinking tokens by an average of 40% and lowers the price by 98% while achieving the same performance.", "context_window": 2000000, "max_output_tokens": 30000, "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "grok-4-fast-reasoning", "description": "Grok-4-fast is a cost-effective inference model developed by xAI that delivers cutting-edge performance with excellent token efficiency. The model features a 2 million token context window, advanced Web and X search capabilities, and a unified architecture supporting both \"inference\" and \"non-inference\" modes. Compared to Grok 4, it reduces thinking tokens by an average of 40% and lowers the price by 98% while achieving the same performance.", "context_window": 2000000, "max_output_tokens": 30000, "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "kimi-k2-0711", "description": "Kimi-K2 is a MoE architecture foundational model with extremely powerful coding and agent capabilities, featuring a total of 1 trillion parameters and activating 32 billion parameters. In benchmark performance tests across major categories such as general knowledge reasoning, programming, mathematics, and agents, the K2 model outperforms other mainstream open-source models.\nThe Kimi-K2 model supports a context length of 128k tokens.\nIt does not support visual capabilities.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 131000, "max_output_tokens": 131000, "pricing": { "input": { "per_million_tokens": 0.54, "currency": "USD" }, "output": { "per_million_tokens": 2.16, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "kimi-k2-turbo-preview", "description": "The kimi-k2-turbo-preview model is a high-speed version of kimi-k2, with the same model parameters as kimi-k2, but the output speed has been increased from 10 tokens per second to 40 tokens per second.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 262144, "max_output_tokens": 262144, "pricing": { "input": { "per_million_tokens": 1.2, "currency": "USD" }, "output": { "per_million_tokens": 4.8, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "veo-3.0-generate-preview", "description": "Veo 3.0 Generate Preview is an advanced AI video generation model that supports text-to-video creation with synchronized audio, featuring excellent physical simulation and lip-sync capabilities. Users can generate vivid video clips from short story prompts. 🎟️ Limited-Time Deal: Save 10% Now.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "veo-3.1-fast-generate-preview", "description": "Veo 3.1 is Google's state-of-the-art model for generating high-fidelity, 8-second 720p or 1080p videos featuring stunning realism and natively generated audio.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "veo-3.1-generate-preview", "description": "Veo 3.1 is Google's state-of-the-art model for generating high-fidelity, 8-second 720p or 1080p videos featuring stunning realism and natively generated audio.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "deepseek-ocr", "description": "DeepSeek-OCR is a vision-language model launched by DeepSeek AI, focusing on optical character recognition (OCR) and “contextual optical compression.” The model is designed to explore the limits of compressing contextual information from images, efficiently processing documents and converting them into structured text formats such as Markdown. The model requires an image as input.", "input_modalities": [ "TEXT", "VISION" ], "context_window": 8000, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "flux-kontext-max", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-router", "description": "New model routing capability; request aihubmix-router to automatically route models based on question complexity, so everyone no longer needs to manually switch models; in our tests comparing the use of the model router versus only using GPT-4.1, we observed up to 60% cost savings while maintaining similar accuracy. \nThe context length of the model router depends on the base model used for each prompt. Input size is 200,000, output size is 32,768. \nCurrently, there are four routing models: gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o4-mini. \nPricing: Due to our current billing structure system, requests through aihubmix-router are billed at the price of gpt-4.1-mini regardless of which final model is used; future billing will be based on the actual model invoked. \nEveryone is welcome to try it out; the interface will return the name of the actual called model.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "qwen3-vl-plus", "description": "The Qwen3 series visual understanding model achieves an effective fusion of thinking and non-thinking modes. Its visual agent capabilities reach world-class levels on public test sets such as OS World. This version features comprehensive upgrades in visual coding, spatial perception, and multimodal reasoning; visual perception and recognition abilities are greatly enhanced, supporting ultra-long video understanding.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "context_window": 256000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.137, "currency": "USD" }, "output": { "per_million_tokens": 1.37, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0274, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "gemini-2.5-pro-preview-03-25", "description": "Supports high concurrency. \nThe Gemini 2.5 Pro preview version is here, with higher limits for production testing. \nGoogle's latest and most powerful model;", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.31, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs,long_context" } }, { "id": "gemini-2.5-pro-preview-05-06-search", "description": "Integrated with Google's official search function.", "capabilities": [ "REASONING", "WEB_SEARCH" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.31, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "thinking", "web" ], "original_types": "llm,search", "original_features": "thinking,web" } }, { "id": "gemini-2.5-pro-preview-03-25-search", "description": "Integrated with Google's official search function.", "capabilities": [ "REASONING", "WEB_SEARCH", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.31, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "thinking", "web", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm,search", "original_features": "thinking,web,tools,function_calling,structured_outputs,long_context" } }, { "id": "qwen3-max-preview", "description": "Qwen3-Max-Preview is the latest preview model in the Qwen3 series. This version is functionally equivalent to Qwen3-Max-Thinking — simply set extra_body={\"enable_thinking\": True} to enable the thinking mode. Compared to the Qwen2.5 series, it delivers significant improvements in overall general capabilities, including English–Chinese text understanding, complex instruction following, open-ended reasoning, multilingual processing, and tool-use proficiency. The model also exhibits fewer hallucinations and stronger overall reliability.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.822, "currency": "USD" }, "output": { "per_million_tokens": 3.288, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.822, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "qwen3-235b-a22b-instruct-2507", "description": "Qwen3-235B-A22B-Instruct-2507", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 262144, "max_output_tokens": 262144, "pricing": { "input": { "per_million_tokens": 0.28, "currency": "USD" }, "output": { "per_million_tokens": 1.12, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "qwen3-coder-480b-a35b-instruct", "description": "The code generation model based on Qwen3 has powerful Coding Agent capabilities, achieving state-of-the-art performance compared to open-source models.The model adopts tiered pricing.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 262000, "max_output_tokens": 262000, "pricing": { "input": { "per_million_tokens": 0.82, "currency": "USD" }, "output": { "per_million_tokens": 3.28, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.82, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "qwen3-coder-plus-2025-07-22", "description": "The code generation model based on Qwen3 has powerful Coding Agent capabilities, excels in tool invocation and environment interaction, and can achieve autonomous programming with outstanding coding abilities while also possessing general capabilities.The model adopts tiered pricing.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 0.54, "currency": "USD" }, "output": { "per_million_tokens": 2.16, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.54, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "deepseek-v3", "description": "It has been automatically upgraded to the latest released version, 250324.\nAutomatically upgraded to the latest released version 250324.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 1638000, "max_output_tokens": 1638000, "pricing": { "input": { "per_million_tokens": 0.272, "currency": "USD" }, "output": { "per_million_tokens": 1.088, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "gemini-2.5-pro-preview-06-05-search", "description": "Integrated with Google's official search function.", "capabilities": [ "REASONING", "WEB_SEARCH", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.31, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "thinking", "web", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm,search", "original_features": "thinking,web,tools,function_calling,structured_outputs,long_context" } }, { "id": "imagen-4.0-ultra-generate-exp-05-20", "description": "Image 4.0 Beta version, for testing purposes only. For production environment, it is recommended to use imagen-4.0-generate-preview-05-20.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "ernie-5.0-thinking-preview", "description": "The new generation Wenxin model, Wenxin 5.0, is a native full-modal large model that adopts native full-modal unified modeling technology, jointly modeling text, images, audio, and video, possessing comprehensive full-modal capabilities. Wenxin 5.0's basic abilities are comprehensively upgraded, performing excellently on benchmark test sets, especially in multimodal understanding, instruction compliance, creative writing, factual accuracy, intelligent agent planning, and tool application.", "capabilities": [ "REASONING", "STRUCTURED_OUTPUT", "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "context_window": 183000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 0.822, "currency": "USD" }, "output": { "per_million_tokens": 3.288, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "structured_outputs", "function_calling" ], "original_types": "llm", "original_features": "thinking,structured_outputs,function_calling" } }, { "id": "inclusionai/ling-1t", "description": "Ling-1T is the first flagship non-thinking model in the “Ling 2.0” series, featuring 1 trillion total parameters and approximately 50 billion active parameters per token. Built on the Ling 2.0 architecture, Ling-1T is designed to push the limits of efficient inference and scalable cognition. Ling-1T-base was pretrained on over 20 trillion high-quality, reasoning-intensive tokens, supports up to a 128K context length, and incorporates an Evolutionary Chain of Thought (Evo-CoT) process during mid-stage and post-stage training. This training regimen greatly enhances the model’s efficiency and depth of reasoning, enabling Ling-1T to achieve top performance across multiple complex reasoning benchmarks, balancing accuracy and efficiency.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.548, "currency": "USD" }, "output": { "per_million_tokens": 2.192, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "inclusionai/ring-1t", "description": "Ring-1T is an open-source idea model with a trillion parameters released by the Bailing team. It is based on the Ling 2.0 architecture and the Ling-1T-base foundational model for training, with a total parameter count of 1 trillion, an active parameter count of 50 billion, and supports up to a 128K context window. The model is trained via large-scale verifiable reward reinforcement learning (RLVR), combined with the self-developed Icepop reinforcement learning stabilization method and the efficient ASystem reinforcement learning system, significantly improving the model’s deep reasoning and natural language reasoning capabilities. Ring-1T achieves leading performance among open-source models on high-difficulty reasoning benchmarks such as mathematics competitions (e.g., IMO 2025), code generation (e.g., ICPC World Finals 2025), and logical reasoning.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.548, "currency": "USD" }, "output": { "per_million_tokens": 2.192, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "glm-4.5-x", "description": "GLM-4.5-X is the high-speed version of GLM-4.5, offering powerful performance with a generation speed of up to 100 tokens per second.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 2.2, "currency": "USD" }, "output": { "per_million_tokens": 8.91, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.44, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gme-qwen2-vl-2b-instruct", "description": "The GME-Qwen2VL series is a unified multimodal Embedding model trained based on the Qwen2-VL multimodal large language model (MLLMs). The GME model supports three types of inputs: text, images, and image-text pairs. All these input types can generate universal vector representations and exhibit excellent retrieval performance.", "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 0.138, "currency": "USD" }, "output": { "per_million_tokens": 0.138, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "gte-rerank-v2", "description": "gte-rerank-v2 is a multilingual unified text ranking model developed by Tongyi Lab, covering multiple major languages worldwide and providing high-quality text ranking services. It is typically used in scenarios such as semantic retrieval and RAG, and can simply and effectively improve text retrieval performance. Given a query and a set of candidate texts (documents), the model ranks the candidates from highest to lowest based on their semantic relevance to the query.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "rerank" ], "original_types": "rerank" } }, { "id": "inclusionai/ling-flash-2.0", "description": "Ling-flash-2.0 is a language model from inclusionAI with a total of 100 billion parameters, of which 6.1 billion are activated per token (4.8 billion non-embedding). As part of the Ling 2.0 architecture series, it is designed as a lightweight yet powerful Mixture-of-Experts (MoE) model. It aims to deliver performance comparable to or even exceeding that of 40B-level dense models and other larger MoE models, but with a significantly smaller active parameter count. The model represents a strategy focused on achieving high performance and efficiency through extreme architectural design and training methods.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.136, "currency": "USD" }, "output": { "per_million_tokens": 0.544, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "inclusionai/ling-mini-2.0", "description": "Ling-mini-2.0 is a small-sized, high-performance large language model based on the MoE architecture. It has a total of 16 billion parameters, but only activates 1.4 billion parameters per token (non-embedding 789 million), achieving extremely high generation speed. Thanks to the efficient MoE design and large-scale high-quality training data, despite activating only 1.4 billion parameters, Ling-mini-2.0 still demonstrates top-tier performance on downstream tasks comparable to dense LLMs under 10 billion parameters and even larger-scale MoE models.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.068, "currency": "USD" }, "output": { "per_million_tokens": 0.272, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "inclusionai/ring-flash-2.0", "description": "Ring-flash-2.0 is a high-performance thinking model deeply optimized based on the Ling-flash-2.0-base. It uses a mixture-of-experts (MoE) architecture with a total of 100 billion parameters, but only activates 6.1 billion parameters per inference. The model employs the original Icepop algorithm to solve the instability issues of large MoE models during reinforcement learning (RL) training, enabling its complex reasoning capabilities to continuously improve over long training cycles. Ring-flash-2.0 has achieved significant breakthroughs on multiple high-difficulty benchmarks, including mathematics competitions, code generation, and logical reasoning. Its performance not only surpasses top dense models under 40 billion parameters but also rivals larger open-source MoE models and closed-source high-performance thinking models. Although the model focuses on complex reasoning, it also performs exceptionally well on creative writing tasks. Furthermore, thanks to its efficient architecture, Ring-flash-2.0 delivers high performance with low-latency inference, significantly reducing deployment costs in high-concurrency scenarios.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.136, "currency": "USD" }, "output": { "per_million_tokens": 0.544, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "irag-1.0", "description": "Baidu's self-developed ERNIE iRAG (ERNIE image-based RAG), a retrieval-augmented text-to-image technology, combines Baidu Search's hundreds of millions of image resources with powerful foundational model capabilities to generate various ultra-realistic images. The overall effect far surpasses native text-to-image systems, eliminating the typical AI feel while maintaining low costs. ERNIE iRAG features no hallucinations, ultra-realism, and instant usability.", "capabilities": [ "IMAGE_GENERATION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "jina-deepsearch-v1", "description": "DeepSearch combines search, reading, and reasoning capabilities to pursue the best possible answer. It's fully compatible with OpenAI's Chat API format—just replace api.openai.com with aihubmix.com to get started. \nThe stream will return the thinking process.", "capabilities": [ "REASONING", "WEB_SEARCH" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 1000000, "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "thinking", "web", "deepsearch" ], "original_types": "llm,search", "original_features": "thinking,web,deepsearch" } }, { "id": "jina-embeddings-v4", "description": "A general-purpose vector model with 3.8 billion parameters, used for multimodal and multilingual retrieval, supporting both unidirectional and multi-vector embedding outputs.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "jina-reranker-v3", "description": "Multimodal multilingual document reranker, 131K context, 0.6B parameters, for visual document sorting.", "input_modalities": [ "TEXT", "VISION" ], "context_window": 131000, "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "rerank" ], "original_types": "rerank" } }, { "id": "qwen-image", "description": "Qwen-Image is a foundational image generation model in the Qwen series, achieving significant progress in complex text rendering and precise image editing. Experiments show that the model has strong general capabilities in image generation and editing, especially excelling in Chinese text rendering.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "qwen-image-edit", "description": "Qwen-Image-Edit is the image editing version of Qwen-Image. Based on the 20B Qwen-Image model, Qwen-Image-Edit successfully extends Qwen-Image's unique text rendering capabilities to image editing tasks, achieving precise text editing. Additionally, Qwen-Image-Edit can input the same image into Qwen2.5-VL (for visual semantic control) and the VAE encoder (for visual appearance control), enabling both semantic and appearance editing functionalities.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "qwen-image-plus", "description": "Qwen-Image is a foundational image generation model in the Qwen series, achieving significant progress in complex text rendering and precise image editing. Experiments show that the model has strong general capabilities in image generation and editing, especially excelling in Chinese text rendering.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "qwen-mt-plus", "description": "Based on the comprehensive upgrade of Qwen3, this flagship translation large model supports bidirectional translation across 92 languages. It offers fully enhanced model performance and translation quality, along with more stable terminology customization, format fidelity, and domain-prompting capabilities, making translations more accurate and natural.", "input_modalities": [ "TEXT" ], "context_window": 16000, "max_output_tokens": 8000, "pricing": { "input": { "per_million_tokens": 0.492, "currency": "USD" }, "output": { "per_million_tokens": 1.476, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "qwen-mt-turbo", "description": "Based on the comprehensive upgrade of Qwen3, this flagship translation large model supports bidirectional translation across 92 languages. It offers fully enhanced model performance and translation quality, along with more stable terminology customization, format fidelity, and domain-prompting capabilities, making translations more accurate and natural.", "input_modalities": [ "TEXT" ], "context_window": 16000, "max_output_tokens": 8000, "pricing": { "input": { "per_million_tokens": 0.192, "currency": "USD" }, "output": { "per_million_tokens": 0.534912, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "qwen3-embedding-0.6b", "description": "The Qwen3 Embedding model series is the latest proprietary model family from Qwen, specifically designed for text embedding and ranking tasks. Based on the dense base models of the Qwen3 series, it offers comprehensive text embedding and reranking models in various sizes (0.6B, 4B, and 8B). This series inherits the excellent multilingual capabilities, long-text understanding, and reasoning skills of its base models. The Qwen3 Embedding series demonstrates significant advancements in various text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bilingual text mining.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.068, "currency": "USD" }, "output": { "per_million_tokens": 0.068, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "qwen3-reranker-0.6b", "description": "Based on the dense foundational model of the Qwen3 series, it is specifically designed for ranking tasks. It inherits the base model’s outstanding multilingual capabilities, long-text understanding, and reasoning skills, achieving significant advancements in ranking tasks.", "input_modalities": [ "TEXT", "VISION" ], "context_window": 16000, "max_output_tokens": 8000, "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "rerank" ], "original_types": "rerank" } }, { "id": "qwen3-reranker-4b", "description": "Based on the dense foundational model of the Qwen3 series, it is specifically designed for ranking tasks. It inherits the base model’s outstanding multilingual capabilities, long-text understanding, and reasoning skills, achieving significant advancements in ranking tasks.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "rerank" ], "original_types": "rerank" } }, { "id": "qwen3-reranker-8b", "description": "Based on the dense foundational model of the Qwen3 series, it is specifically designed for ranking tasks. It inherits the base model’s outstanding multilingual capabilities, long-text understanding, and reasoning skills, achieving significant advancements in ranking tasks.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "rerank" ], "original_types": "rerank" } }, { "id": "tao-8k", "pricing": { "input": { "per_million_tokens": 0.068, "currency": "USD" }, "output": { "per_million_tokens": 0.068, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "bce-reranker-base", "description": "Based on the dense foundational model of the Qwen3 series, it is specifically designed for ranking tasks. It inherits the base model’s outstanding multilingual capabilities, long-text understanding, and reasoning skills, achieving significant advancements in ranking tasks.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.068, "currency": "USD" }, "output": { "per_million_tokens": 0.068, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "rerank" ], "original_types": "rerank" } }, { "id": "codex-mini-latest", "description": "Only supports v1/responses API calls.https://docs.aihubmix.com/en/api/Responses-API\ncodex-mini-latest is a fine-tuned version of o4-mini specifically for use in Codex CLI. For direct use in the API, we recommend starting with gpt-4.1.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 1.5, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.375, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "doubao-seedream-4-0", "description": "Seedream 4.0 is a SOTA-level multimodal image creation model based on leading architecture. It breaks the creative boundaries of traditional text-to-image models by natively supporting text, single image, and multiple image inputs. Users can freely combine text and images to achieve various creative styles within the same model, such as multi-image fusion creation based on subject consistency, image editing, and set image generation, making image creation more flexible and controllable.\nSeedream 4.0 supports composite editing with up to 10 images in a single input. Through deep reasoning of prompt words, it automatically adapts the optimal image aspect ratio and generation quantity, enabling continuous output of up to 15 content-related images at one time. Additionally, the model significantly improves the accuracy and content diversity of Chinese generation, supports 4K ultra-high-definition output, and provides a one-stop solution from generation to editing for professional image creation.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "embedding-v1", "description": "Embedding-V1 is a text representation model based on Baidu's Wenxin large model technology, capable of converting text into numerical vector forms for applications such as text retrieval, information recommendation, and knowledge mining. Embedding-V1 provides an Embeddings interface that generates corresponding vector representations based on the input content. By calling this interface, you can input text into the model and obtain the corresponding vector representations for subsequent text processing and analysis.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.068, "currency": "USD" }, "output": { "per_million_tokens": 0.068, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "ernie-4.5-turbo-latest", "description": "Wenxin 4.5 Turbo also has significant improvements in hallucination reduction, logical reasoning, and coding capabilities. Compared to Wenxin 4.5, it is faster and more affordable.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 135000, "max_output_tokens": 12000, "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.44, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "ernie-irag-edit", "description": "Baidu's self-developed ERNIE iRAG Edit image editing model supports operations based on images such as erase (object removal), repaint (object redrawing), and variation (variant generation).", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT", "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation", "tools", "function_calling", "structured_outputs" ], "category": "image-generation", "original_types": "image_generation", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "jina-clip-v2", "description": "Multi-modal Embeddings Model, multilingual, 1024-dimensional, 865M parameters.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "jina-reranker-m0", "description": "Multimodal multilingual document reranker, 10K context, 2.4B parameters, for visual document sorting.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "rerank" ], "original_types": "rerank" } }, { "id": "jina-colbert-v2", "description": "Multi-language ColBERT embeddings model, 560M parameters, used for embedding and reranking.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding", "rerank" ], "original_types": "embedding,rerank" } }, { "id": "jina-embeddings-v3", "description": "Text Embeddings Model, multilingual, 1024-dimensional, 570M parameters.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.05, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "flux.1-kontext-pro", "description": "Generate and edit images through both text and image prompts. Flux.1 Kontext is a multimodal flow matching model that enables both text-to-image generation and in-context image editing. Modify images while maintaining character consistency and performing local edits up to 8x faster than other leading models.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 40, "currency": "USD" }, "output": { "per_million_tokens": 40, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "claude-3-7-sonnet", "description": "Support for the thinking parameter through the original Claude SDK.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 200000, "max_output_tokens": 128000, "pricing": { "input": { "per_million_tokens": 3.3, "currency": "USD" }, "output": { "per_million_tokens": 16.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "ernie-4.5", "description": "Wenxin Large Model 4.5 is a next-generation native multimodal foundational model independently developed by Baidu. It achieves collaborative optimization through joint modeling of multiple modalities, demonstrating excellent multimodal understanding capabilities; it possesses more advanced language abilities, with comprehensive improvements in comprehension, generation, logic, and memory, as well as significant enhancements in hallucination reduction, logical reasoning, and coding capabilities.ERNIE-4.5-21B-A3B is an aligned open-source model with a MoE structure, having a total of 21 billion parameters and 3 billion activated parameters.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 160000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 0.068, "currency": "USD" }, "output": { "per_million_tokens": 0.272, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "ernie-4.5-turbo-vl", "description": "The new version of the Wenxin Yiyan large model significantly improves capabilities in image understanding, creation, translation, and coding. It supports a context length of up to 32K tokens for the first time, with a notable reduction in the latency of the first token.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 139000, "max_output_tokens": 16000, "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "gemini-2.0-flash", "description": "Gemini 2.0 Flash is Google's latest lightweight model featuring extremely low hallucination rates while maintaining fast response times, offering developers high-precision and efficient AI solutions particularly suited for applications requiring high factual accuracy.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.25, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs,long_context" } }, { "id": "gemini-2.0-flash-preview-image-generation", "description": "Gemini 2.0 Flash EXP is the official preview version of the drawing model. Compared to Imagen 3.0, Gemini’s image generation is better suited for scenarios that require contextual understanding and reasoning, rather than the pursuit of ultimate artistic performance and visual quality.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "image_generation" ], "category": "image-generation", "original_types": "llm,image_generation" } }, { "id": "flux-1.1-pro", "description": "FLUX-1.1-pro is an AI image generation tool for professional creators and content workflows. It understands complex semantic and structural instructions to deliver high consistency, multi-image coherence, and style customization from text prompts.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 40, "currency": "USD" }, "output": { "per_million_tokens": 40, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "doubao-seed-1-6", "description": "Doubao-Seed-1.6 is a brand new multimodal deep reasoning model that supports four types of reasoning effort: minimal, low, medium, and high. It offers stronger model performance, serving complex tasks and challenging scenarios. It supports a 256k context window, with output length up to a maximum of 32k tokens.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "context_window": 256000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 1.8, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.036, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking,tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "doubao-seed-1-6-flash", "description": "Doubao-Seed-1.6-flash is an extremely fast multimodal deep thinking model, with TPOT requiring only 10ms. It supports both text and visual understanding, with its text comprehension skills surpassing the previous generation lite model and its visual understanding on par with competitor's pro series models. It supports a 256k context window and an output length of up to 16k tokens.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "context_window": 256000, "max_output_tokens": 33000, "pricing": { "input": { "per_million_tokens": 0.044, "currency": "USD" }, "output": { "per_million_tokens": 0.44, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0088, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking,tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "doubao-seed-1-6-lite", "description": "Doubao-Seed-1.6-lite is a brand new multimodal deep reasoning model that supports adjustable reasoning effort, with four modes: Minimal, Low, Medium, and High. It offers better cost performance, making it the best choice for common tasks, with a context window of up to 256k.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "context_window": 256000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.082, "currency": "USD" }, "output": { "per_million_tokens": 0.656, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0164, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking,tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "doubao-seed-1-6-thinking", "description": "The Doubao-Seed-1.6-thinking model has significantly enhanced reasoning capabilities. Compared with Doubao-1.5-thinking-pro, it has further improvements in fundamental abilities such as coding, mathematics, and logical reasoning, and now also supports visual understanding. It supports a 256k context window, with output length supporting up to 16k tokens.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "context_window": 256000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 1.8, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.036, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking,tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "qwen-3-235b-a22b-thinking-2507", "description": "cerebras", "pricing": { "input": { "per_million_tokens": 0.28, "currency": "USD" }, "output": { "per_million_tokens": 2.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-r1-distill-qwen-7b", "description": "The model provider is the Sophnet platform. DeepSeek-R1-Distill-Qwen-7B is a distilled model based on the Qwen architecture, optimized for high reasoning speed and low cost. It achieves approximately 70% of the performance of the original model at the 7B scale, while reducing response latency by 40%, making it suitable for real-time interactive scenarios.\nThe API call cost is only one-quarter of the original Qwen-7B.\nIt supports streaming output, making it suitable for applications like chatbots.\nIt achieves an accuracy of over 65% on the GSM8K math task.", "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.12, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen2-vl-72b-instruct", "description": "The model provider is the Sophnet platform. Qwen2-VL-72B-Instruct is the latest iteration in the Qwen2-VL series launched by Alibaba Cloud, representing nearly a year of innovative achievements. This model has 72 billion parameters and can understand images of various resolutions and aspect ratios. Additionally, it supports video understanding of over 20 minutes, enabling high-quality video question answering, dialogue, and content creation, along with complex reasoning and decision-making capabilities.\n\n- State-of-the-art image understanding: capable of processing images of various resolutions and aspect ratios, performing excellently across multiple visual understanding benchmarks.\n- Long video understanding: supports video comprehension exceeding 20 minutes, enabling high-quality video Q&A, dialogues, and content creation.\n- Agent operation capability: equipped with complex reasoning and decision-making abilities, it can integrate with devices such as phones and robots to perform automated operations based on visual environments and textual instructions.\n- Multilingual support: in addition to English and Chinese, it supports understanding text in images in multiple languages, including most European languages, Japanese, Korean, Arabic, Vietnamese, and more.\n- Supports a maximum context length of 128K tokens, offering powerful processing capabilities.", "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2.18, "currency": "USD" }, "output": { "per_million_tokens": 6.54, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen2-vl-7b-instruct", "description": "The model provider is the Sophnet platform. Qwen2-VL-7B-Instruct is the latest vision-language model launched by Alibaba Cloud and the newest member of the Qwen family. This model is proficient not only in recognizing common objects but also in analyzing text, charts, icons, and layouts within images. As a visual agent, it can reason and dynamically guide tool usage, supporting operations on computers and mobile phones. Additionally, it can understand long videos exceeding one hour and capture key events, accurately locate objects in images, and generate structured outputs for data such as invoices and tables, making it suitable for various scenarios including finance and business.\n\n- Vision understanding capability: not only recognizes common objects but also analyzes text, charts, icons, and layouts within images.\n- Agent capability: functions as a visual agent capable of reasoning and dynamically guiding tool usage, supporting operations on computers and mobile phones.\n- Long video understanding: can comprehend video content over one hour in length and accurately localize relevant video segments.\n- Visual localization: precisely locates objects within images by generating bounding boxes or points, providing stable JSON coordinate outputs.\n- Structured output: supports structured data output for invoices, tables, and other data, suitable for finance, business, and various other scenarios.", "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 0.28, "currency": "USD" }, "output": { "per_million_tokens": 0.7, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "cc-kimi-for-coding", "description": "for claude code", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen3-30b-a3b", "description": "Provided by chutes.ai", "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen3-32b", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen3-14b", "description": "Provided by chutes.ai", "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen3-8b", "description": "Provided by chutes.ai", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-2.0-pro-exp-02-05-search", "description": "Integrated with Google's official search and internet connectivity features.", "capabilities": [ "WEB_SEARCH" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "web" ], "original_features": "web" } }, { "id": "gemini-2.5-pro-preview-06-05", "description": "Google’s latest multimodal flagship model, combining exceptional coding and reasoning capabilities. Its massive 1 million token context window (soon to expand to 2 million) places it at the top of the WebDevArena and LMArena leaderboards. It is particularly well-suited for developing aesthetically pleasing and highly functional interactive web applications, code transformation, and complex workflows. The newly introduced \"reasoning budget\" feature cleverly balances cost and performance, while optimized tool calls and response styles further enhance development efficiency, making it the ideal choice for rapid prototyping and advanced coding.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "context_window": 1048576, "max_output_tokens": 65536, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.31, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs,long_context" } }, { "id": "aihubmix-mai-ds-r1", "description": "MAI-DS-R1 is a refined version of DeepSeek-R1 by Microsoft AI, designed to improve responsiveness to previously blocked topics while enhancing safety. It integrates 110k Tulu-3 SFT samples and 350k multilingual safety-alignment examples. The model retains strong reasoning and coding abilities, surpasses R1-1776 in handling sensitive queries, and reduces harmful content leakage. Based on a transformer MoE architecture, it suits general-purpose tasks—excluding legal, medical, or autonomous systems.", "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 1.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "embedding-2", "description": "A text vector model that converts input text information into vector representations so that, in conjunction with a vector database, it provides an external knowledge base for the large model, thereby improving the accuracy of the model’s reasoning.", "input_modalities": [ "TEXT" ], "context_window": 8000, "pricing": { "input": { "per_million_tokens": 0.0686, "currency": "USD" }, "output": { "per_million_tokens": 0.0686, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "embedding-3", "description": "A text vector model that converts input text into vector representations to work with a vector database and provide an external knowledge base for a large model. The model supports custom vector dimensions; it is recommended to choose 256, 512, 1024, or 2048 dimensions.", "input_modalities": [ "TEXT" ], "context_window": 8000, "pricing": { "input": { "per_million_tokens": 0.0686, "currency": "USD" }, "output": { "per_million_tokens": 0.0686, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "gemini-2.0-flash-search", "description": "Integrated with Google's official search and internet connectivity features.", "capabilities": [ "WEB_SEARCH" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.25, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "web" ], "original_types": "llm,search", "original_features": "web" } }, { "id": "qwen/qwen2.5-vl-72b-instruct", "description": "Qwen2.5-VL is a visual language model from the Qwen2.5 series, equipped with strong visual understanding and reasoning capabilities. It can recognize objects, analyze text and charts, understand key events in long videos, and accurately locate targets within images. The model supports structured output, making it suitable for data such as invoices and forms, and performs excellently in multiple benchmark tests.", "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "bytedance-seed/seed-oss-36b-instruct", "description": "Seed-OSS is a series of open-source large language models developed by ByteDance's Seed team, designed specifically for powerful long-context processing, reasoning, agents, and general capabilities. Among this series, Seed-OSS-36B-Instruct is an instruction-tuned model with 36 billion parameters that natively supports ultra-long context lengths, enabling it to process massive documents or complex codebases in a single pass. This model is specially optimized for reasoning, code generation, and agent tasks (such as tool usage), while maintaining balanced and excellent general capabilities. A notable feature of this model is the \"Thinking Budget\" functionality, which allows users to flexibly adjust the inference length as needed, thereby effectively improving inference efficiency in practical applications.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 256000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.534, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking,tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "doubao-seed-1-6-250615", "description": "Doubao-Seed-1.6 is a brand new multimodal deep reasoning model that supports four types of reasoning effort: minimal, low, medium, and high. It offers stronger model performance, serving complex tasks and challenging scenarios. It supports a 256k context window, with output length up to a maximum of 32k tokens.", "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 2.52, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.036, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-seed-1-6-flash-250615", "description": "Doubao-Seed-1.6-flash is an extremely fast multimodal deep thinking model, with TPOT requiring only 10ms. It supports both text and visual understanding, with its text comprehension skills surpassing the previous generation lite model and its visual understanding on par with competitor's pro series models. It supports a 256k context window and an output length of up to 16k tokens.", "pricing": { "input": { "per_million_tokens": 0.044, "currency": "USD" }, "output": { "per_million_tokens": 0.44, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0088, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-seed-1-6-thinking-250615", "description": "The Doubao-Seed-1.6-thinking model has significantly enhanced reasoning capabilities. Compared with Doubao-1.5-thinking-pro, it has further improvements in fundamental abilities such as coding, mathematics, and logical reasoning, and now also supports visual understanding. It supports a 256k context window, with output length supporting up to 16k tokens.", "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 2.52, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.036, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-seed-1-6-vision-250815", "description": "Doubao-Seed-1.6-vision is a visual deep-thinking model that demonstrates stronger general multimodal understanding and reasoning capabilities in scenarios such as education, image moderation, inspection and security, and AI search Q&A. It supports a 256K context window and an output length of up to 64K tokens.", "pricing": { "input": { "per_million_tokens": 0.10959, "currency": "USD" }, "output": { "per_million_tokens": 1.0959, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.021918, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-1.5-thinking-pro", "description": "Doubao-1.5 is a brand-new deep thinking model that excels in specialized fields such as mathematics, programming, scientific reasoning, and general tasks like creative writing. It achieves or approaches the top-tier industry level on multiple authoritative benchmarks including AIME 2024, Codeforces, and GPQA. It supports a 128k context window and 16k output.", "pricing": { "input": { "per_million_tokens": 0.62, "currency": "USD" }, "output": { "per_million_tokens": 2.48, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.62, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "cc-minimax-m2", "description": "For Claude Code only", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "deepseek-ai/deepseek-prover-v2-671b", "description": "Provided by chutes.ai\nDeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics. Likely an upgrade from DeepSeek-Prover-V1.5 Not much is known about the model yet, as DeepSeek released it on Hugging Face without an announcement or description.", "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemma-3-1b-it", "description": "Gemma 3 models are multimodal, handling text and image input and generating text output, with open weights for both pre-trained variants and instruction-tuned variants. Gemma 3 has a large, 128K context window, multilingual support in over 140 languages, and is available in more sizes than previous versions. Gemma 3 models are well-suited for a variety of text generation and image understanding tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as laptops, desktops or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4o-image-vip", "description": "First Taste of GPT-4o's Image Generation API: Perfectly mirrors the web version's raw image creation capabilities, supporting both text-to-image and image+text-to-image generation. Each creation costs as little as $0.009.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 7, "currency": "USD" }, "output": { "per_million_tokens": 7, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "gpt-4o-image", "description": "First Taste of GPT-4o's Image Generation API: Perfectly mirrors the web version's raw image creation capabilities, supporting both text-to-image and image+text-to-image generation. Each creation costs as little as $0.005.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 3, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "gpt-4o-mini-tts", "description": "OpenAI’s latest TTS model, gpt-4o-mini-tts, uses the same API endpoint (/v1/audio/speech) as tts-1. However, OpenAI introduced a new pricing method without providing billing details via API, causing discrepancies between official pricing and aihubmix’s charges—some requests may cost more, others less. Avoid using this model if precise billing accuracy is essential.", "input_modalities": [ "AUDIO" ], "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "tts" ], "original_types": "tts" } }, { "id": "tngtech/deepseek-r1t-chimera", "description": "Provided by chutes.ai\nDeepSeek-R1T-Chimera merges DeepSeek-R1’s reasoning strengths with DeepSeek-V3 (0324)’s token-efficiency improvements into a MoE Transformer optimized for general text generation. It integrates pretrained weights from both models and is released under the MIT license for research and commercial use.\n", "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-2.0-flash-exp", "description": "https://doc.aihubmix.com/en/api/Gemini%20%E5%9B%BE%E7%89%87%E7%94%9F%E6%88%90%E5%92%8C%E7%BC%96%E8%BE%91\nInstructions:\n\nNeed to add parameters to experience new features: \"modalities\":[\"text\",\"image\"]\nImages are passed and output in Base64 encoding\nAs an experimental model, it's recommended to explicitly specify \"output image\", otherwise it might only output text\nDefault height for output images is 1024px\nPython calls require the latest OpenAI SDK, run pip install -U openai first", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.08, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "image_generation", "long_context" ], "category": "image-generation", "original_types": "llm,image_generation", "original_features": "long_context" } }, { "id": "claude-3-5-sonnet", "description": "Claude 3.5 Sonnet delivers performance superior to Opus and speeds faster than its predecessor, all at the same price point. Its core strengths include:\n\nCoding: Autonomously writes, edits, and executes code with advanced reasoning and troubleshooting.\nData Science: Augments human expertise by analyzing unstructured data and using multiple tools to generate insights.\nVisual Processing: Excels at interpreting charts, graphs, and images, accurately transcribing text to derive high-level insights.\nAgentic Tasks: Exceptional tool use makes it highly effective for complex, multi-step agentic workflows that interact with other systems.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 200000, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 3.3, "currency": "USD" }, "output": { "per_million_tokens": 16.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "o1-preview", "description": "The latest and most powerful inference model from OpenAI; AiHubMix uses both OpenAI and Microsoft Azure OpenAI channels simultaneously to achieve high-concurrency load balancing.", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" }, "cache_read": { "per_million_tokens": 7.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking" ], "original_types": "llm", "original_features": "thinking" } }, { "id": "o1-mini", "description": "o1-mini is faster and 80% cheaper, and is competitive with o1-preview on coding tasks. AiHubMix uses both OpenAI and Microsoft Azure OpenAI channels simultaneously.", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 12, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-2.0-flash-thinking-exp-01-21", "description": "The latest version, Gemini 2.0 Flash Thinking mode, is an experimental model designed to generate the \"thought process\" that the model goes through during its responses. Therefore, Gemini 2.0 Flash Thinking mode has stronger reasoning capabilities in its responses compared to the base Gemini 2.0 Flash model.", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 0.076, "currency": "USD" }, "output": { "per_million_tokens": 0.304, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "long_context" ], "original_types": "llm", "original_features": "thinking,long_context" } }, { "id": "aihubmix-mistral-medium", "description": "Mistral Medium 3 is a SOTA & versatile model designed for a wide range of tasks, including programming, mathematical reasoning, understanding long documents, summarization, and dialogue.\n\nIt boasts multi-modal capabilities, enabling it to process visual inputs, and supports dozens of languages, including over 80 coding languages. Additionally, it features function calling and agentic workflows.\n\nMistral Medium 3 is optimized for single-node inference, particularly for long-context applications. Its size allows it to achieve high throughput on a single node.", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-2.0-pro-exp-02-05", "description": "The latest experimental version of Gemini-2.0-Pro", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "image_generation" ], "category": "image-generation", "original_types": "llm,image_generation" } }, { "id": "ernie-x1.1-preview", "description": "The Wenxin large model X1.1 has made significant improvements in question answering, tool invocation, intelligent agents, instruction following, logical reasoning, mathematics, and coding tasks, with notable enhancements in factual accuracy. The context length has been extended to 64K tokens, supporting longer inputs and dialogue history, which improves the coherence of long-chain reasoning while maintaining response speed.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 119000, "max_output_tokens": 64000, "pricing": { "input": { "per_million_tokens": 0.136, "currency": "USD" }, "output": { "per_million_tokens": 0.544, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "qwen/qwq-32b", "description": "Silicon-based flow provision", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "pricing": { "input": { "per_million_tokens": 0.14, "currency": "USD" }, "output": { "per_million_tokens": 0.56, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "chutesai/mistral-small-3.1-24b-instruct-2503", "description": "Mistral's latest open-source small model; provided by chutes.ai.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "ernie-x1.1-preview", "description": "The Wenxin large model X1.1 has made significant improvements in question answering, tool invocation, intelligent agents, instruction following, logical reasoning, mathematics, and coding tasks, with notable enhancements in factual accuracy. The context length has been extended to 64K tokens, supporting longer inputs and dialogue history, which improves the coherence of long-chain reasoning while maintaining response speed.", "pricing": { "input": { "per_million_tokens": 0.136, "currency": "USD" }, "output": { "per_million_tokens": 0.544, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "minimaxai/minimax-m1-80k", "description": "MiniMax-M1 is an open-source large-scale hybrid attention model with 456B total parameters (45.9B activated per token). It natively supports 1M-token context and reduces FLOPs by 75% versus DeepSeek R1 in 100K-token generation tasks via lightning attention. Built on MoE architecture and optimized by CISPO algorithm, it achieves state-of-the-art performance in long-context reasoning and real-world software engineering scenarios.", "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 2.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen2.5-vl-32b-instruct", "description": "Qwen2.5-VL-32B-Instruct is an advanced multimodal model from the Tongyi Qianwen team that can recognize objects, analyze text and graphics in images, operate tools, locate objects in images, and generate structured outputs. Through reinforcement learning, it has improved mathematics and problem-solving capabilities, with a more concise and natural response style.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 0.24, "currency": "USD" }, "output": { "per_million_tokens": 0.24, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "baidu/ernie-4.5-300b-a47b", "description": "ERNIE-4.5-300B-A47B is a large language model developed by Baidu based on a Mixture of Experts (MoE) architecture. The model has a total of 300 billion parameters, but only activates 47 billion parameters per token during inference, which balances strong performance with computational efficiency. As one of the core models in the ERNIE 4.5 series, it demonstrates outstanding capabilities in tasks such as text understanding, generation, reasoning, and programming. The model employs an innovative multimodal heterogeneous MoE pretraining approach, leveraging joint training of textual and visual modalities to effectively enhance the model’s overall abilities, particularly excelling in instruction following and world knowledge memorization. Baidu has open-sourced this model along with other models in the series, aiming to promote the research and application of AI technology.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.32, "currency": "USD" }, "output": { "per_million_tokens": 1.28, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "bge-large-en", "description": "bge-large-en, open-sourced by the Beijing Academy of Artificial Intelligence (BAAI), is currently the most powerful vector representation model for Chinese tasks, with its semantic representation capabilities comprehensively surpassing those of similar open-source models.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.068, "currency": "USD" }, "output": { "per_million_tokens": 0.068, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding", "tools", "function_calling", "structured_outputs" ], "original_types": "embedding", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "bge-large-zh", "description": "bge-large-zh, open-sourced by the Beijing Academy of Artificial Intelligence (BAAI), is currently the most powerful vector representation model for Chinese tasks, with its semantic representation capabilities comprehensively surpassing those of similar open-source models.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.068, "currency": "USD" }, "output": { "per_million_tokens": 0.068, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding", "tools", "function_calling", "structured_outputs" ], "original_types": "embedding", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "claude-opus-4-0", "description": "Alias \nclaude-opus-4-20250514", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 200000, "max_output_tokens": 32000, "pricing": { "input": { "per_million_tokens": 16.5, "currency": "USD" }, "output": { "per_million_tokens": 82.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "codestral-latest", "description": "Mistral has launched a new code model - Codestral 25.01; https://mistral.ai/news/codestral-2501/", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "ernie-4.5-0.3b", "description": "Wenxin Large Model 4.5 is a next-generation native multimodal foundational large model independently developed by Baidu. It achieves collaborative optimization through joint modeling of multiple modalities, demonstrating excellent multimodal understanding capabilities. The model possesses enhanced language abilities, with comprehensive improvements in understanding, generation, reasoning, and memory. It significantly reduces hallucinations and shows notable advancements in logical reasoning and coding skills.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.0136, "currency": "USD" }, "output": { "per_million_tokens": 0.0544, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "ernie-4.5-turbo-128k-preview", "description": "Wenxin 4.5 Turbo also shows significant enhancements in reducing hallucinations, logical reasoning, and coding capabilities. Compared to Wenxin 4.5, it is faster and more cost-effective.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.108, "currency": "USD" }, "output": { "per_million_tokens": 0.432, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "ernie-x1-turbo", "description": "Wenxin Large Model X1 possesses enhanced abilities in understanding, planning, reflection, and evolution. As a more comprehensive deep-thinking model, Wenxin X1 combines accuracy, creativity, and literary elegance, excelling particularly in Chinese knowledge Q&A, literary creation, document writing, daily conversations, logical reasoning, complex calculations, and tool invocation.", "capabilities": [ "REASONING", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "context_window": 50500, "max_output_tokens": 28000, "pricing": { "input": { "per_million_tokens": 0.136, "currency": "USD" }, "output": { "per_million_tokens": 0.544, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "thinking,tools,function_calling,structured_outputs" } }, { "id": "gemini-2.0-flash-exp-search", "description": "The gemini-2.0-flash-exp model supports internet connectivity, but the official version requires additional request parameters to enable it. Aihubmix has integrated this by automatically calling the official API's online functionality when the model name is requested with the \"search\" parameter.", "capabilities": [ "WEB_SEARCH", "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "search", "web", "tools", "function_calling", "structured_outputs", "long_context" ], "original_types": "llm,search", "original_features": "web,tools,function_calling,structured_outputs,long_context" } }, { "id": "kat-dev", "description": "KAT-Dev (32B) is an open-source 32B parameter model specifically designed for software engineering tasks. It achieved a 62.4% resolution rate on the SWE-Bench Verified benchmark, ranking fifth among all open-source models of various scales. The model is optimized through multiple stages, including intermediate training, supervised fine-tuning (SFT) and reinforcement fine-tuning (RFT), as well as large-scale agent reinforcement learning (RL). Based on Qwen3-32B, its training process lays the foundation for subsequent fine-tuning and reinforcement learning stages by enhancing fundamental abilities such as tool usage, multi-turn interaction, and instruction following. During the fine-tuning phase, the model not only learns eight carefully curated task types and programming scenarios but also innovatively introduces a reinforcement fine-tuning (RFT) stage guided by human engineer-annotated “teacher trajectories.” The final agent reinforcement learning phase addresses scalability challenges through multi-level prefix caching, entropy-based trajectory pruning, and efficient architecture.", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.137, "currency": "USD" }, "output": { "per_million_tokens": 0.548, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools" ], "original_types": "llm", "original_features": "tools" } }, { "id": "llama-3.3-70b", "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.", "context_window": 65536, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "moonshotai/kimi-dev-72b", "description": "Kimi-Dev-72B is a new generation open-source programming large model that achieved a leading performance of 60.4% on SWE-bench Verified. Through large-scale reinforcement learning optimization, it can automatically fix code in real Docker environments, receiving rewards only when passing the complete test suite, thereby ensuring the correctness and robustness of solutions and aligning more closely with real software development standards.", "pricing": { "input": { "per_million_tokens": 0.32, "currency": "USD" }, "output": { "per_million_tokens": 1.28, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "moonshotai/moonlight-16b-a3b-instruct", "description": "Provided by chutes.ai.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "o1-global", "description": "OpenAI new model", "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" }, "cache_read": { "per_million_tokens": 7.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qianfan-qi-vl", "description": "The Qianfan-QI-VL model is a proprietary image quality inspection and visual understanding large model (Quality Inspection Large Vision Language Model, Qianfan-QI-VL) developed by Baidu Cloud’s Qianfan platform. It is designed for quality inspection of product images uploaded in e-commerce scenarios, with detection capabilities including AIGC human defect detection, mosaic recognition, watermark recognition, and trademark detection.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "tencent/hunyuan-a13b-instruct", "description": "Hunyuan-A13B-Instruct has 8 billion parameters and can match larger models by activating only 1.3 billion parameters, supporting \"fast thinking/slow thinking\" hybrid inference. It offers stable long text understanding. Verified by BFCL-v3 and τ-Bench, its Agent capabilities are leading in the field. Combined with GQA and multiple quantization formats, it enables efficient inference.", "pricing": { "input": { "per_million_tokens": 0.14, "currency": "USD" }, "output": { "per_million_tokens": 0.56, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "unsloth/gemma-3-27b-it", "description": "Google's latest open-source model; provided by chutes.ai", "pricing": { "input": { "per_million_tokens": 0.22, "currency": "USD" }, "output": { "per_million_tokens": 0.22, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "unsloth/gemma-3-12b-it", "description": "Provided by chutes.ai.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-exp-1206", "description": "Google's latest experimental model, currently Google's most powerful model.", "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4o-zh", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "qwen-qwq-32b", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen-max-0125", "description": "Qwen 2.5-Max latest model", "pricing": { "input": { "per_million_tokens": 0.38, "currency": "USD" }, "output": { "per_million_tokens": 1.52, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "claude-3-5-haiku", "description": "Claude 3.5 Haiku is the next generation of Claude's fastest model.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "context_window": 200000, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 5.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "baai/bge-large-en-v1.5", "description": "BAAI/bge-large-en-v1.5 is a large English text embedding model and part of the BGE (BAAI General Embedding) series. It achieves excellent performance on the MTEB benchmark, with an average score of 64.23 across 56 datasets, excelling in tasks such as retrieval, clustering, and text pair classification. The model supports a maximum input length of 512 tokens and is suitable for various natural language processing tasks, such as text retrieval and semantic similarity computation.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.034, "currency": "USD" }, "output": { "per_million_tokens": 0.034, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding", "tools", "function_calling", "structured_outputs" ], "original_types": "embedding", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "baai/bge-large-zh-v1.5", "description": "BAAI/bge-large-zh-v1.5 is a large Chinese text embedding model and part of the BGE (BAAI General Embedding) series. It performs excellently on the C-MTEB benchmark, achieving an average score of 64.53 across 31 datasets, with outstanding results in tasks such as retrieval, semantic similarity, and text pair classification. The model supports a maximum input length of 512 tokens and is suitable for various Chinese natural language processing tasks, such as text retrieval and semantic similarity computation.", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.034, "currency": "USD" }, "output": { "per_million_tokens": 0.034, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding", "tools", "function_calling", "structured_outputs" ], "original_types": "embedding", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "baai/bge-reranker-v2-m3", "description": "BAAI/bge-reranker-v2-m3 is a lightweight multilingual reranking model. It is developed based on the bge-m3 model, offering strong multilingual capabilities, easy deployment, and fast inference. The model takes a query and documents as input and directly outputs similarity scores instead of embedding vectors. It is suitable for multilingual scenarios and performs particularly well in both Chinese and English processing.", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.034, "currency": "USD" }, "output": { "per_million_tokens": 0.034, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "rerank" ], "original_types": "rerank" } }, { "id": "tencent/hunyuan-mt-7b", "description": "Hunyuan-MT-7B is a lightweight translation model with 7 billion parameters, designed to translate source text into target languages. The model supports translation among 33 languages as well as 5 Chinese minority languages. In the WMT25 International Machine Translation Competition, Hunyuan-MT-7B achieved first place in 30 out of 31 language categories it participated in, demonstrating its exceptional translation capabilities. For translation scenarios, Tencent Hunyuan proposed a complete training paradigm from pre-training to supervised fine-tuning, followed by translation reinforcement and ensemble reinforcement, enabling it to achieve industry-leading performance among models of similar scale. The model is computationally efficient, easy to deploy, and suitable for various application scenarios.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-2.0-flash-lite-preview-02-05", "description": "Gemini 2.0 Flash lightweight version", "pricing": { "input": { "per_million_tokens": 0.075, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "v3", "description": "Fast and high-quality — top image quality in just 11 seconds per piece, with almost no extra time for batch generation.\nFlexible ratios — supports ultra-wide and tall formats like 3:1, 2:1, offering diverse perspectives.\nUnique strengths — outstanding design capabilities in the V3 and V2 series, with powerful text rendering (Chinese support coming soon).\nPrecise local editing — fine-tuned mask control for area redrawing (edit) and easy background replacement (replace-background).", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "v_2", "description": "The Ideogram AI drawing interface is now live. This model boasts powerful text-to-image capabilities, supporting endpoints are: /generate, /remix, /edit.\nThis model is the stable V_2 version, highly recommended for editing.\nUS $0.08/ 1 IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "v_2_turbo", "description": "The Ideogram AI drawing interface is now live. This model boasts powerful text-to-image capabilities, supporting endpoints are: /generate, /remix, /edit.\nThis model is the fast version of V_2, offering increased speed at the slight expense of quality.\nUS $0.05/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "v_2a", "description": "The Ideogram AI drawing interface is now live. This model boasts powerful text-to-image capabilities, supporting endpoints are: /generate, /remix.\nThis model is the fast version of V_2, faster and cheaper.\nUS $0.04/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "v_2a_turbo", "description": "The Ideogram AI drawing interface is now live. This model boasts powerful text-to-image capabilities, supporting endpoints are: /generate, /remix.\nThis model is the ultra-fast version of V_2, delivering the highest speed while slightly reducing quality.\nUS $0.025/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "v_1", "description": "V_1 is a text-to-image model in the Ideogram series. It delivers strong text rendering capabilities, high photorealistic image quality, and precise prompt adherence. The model also introduces Magic Prompt, a new feature that automatically refines input prompts to generate more detailed and creative visuals.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "v_1_turbo", "description": "The Ideogram AI drawing interface is now live. This model boasts powerful text-to-image capabilities, supporting endpoints are: /generate, /remix.\nThis model is the ultra-fast version of the original V_1, offering increased speed at the slight expense of quality.\nUS $0.02/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "doubao-embedding-large-text-240915", "description": "doubao-embedding-large-text-240915\nDoubao Embedding is a semantic vectorization model developed by ByteDance, primarily designed for vector search scenarios. It supports both Chinese and English languages and has a maximum context length of approximately 4K tokens.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "kimi-thinking-preview", "description": "The latest kimi model.", "pricing": { "input": { "per_million_tokens": 30, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen-plus-latest", "description": "The Qwen series models with balanced capabilities have inference performance and speed between Qwen-Max and Qwen-Turbo, making them suitable for moderately complex tasks. This model is a dynamically updated version, and updates will not be announced in advance. The current version is qwen-plus-2025-04-28.The model adopts tiered pricing.", "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.275, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "aihubmix-phi-4-reasoning", "description": "Phi-4-Reasoning is a state-of-the-art open-weight reasoning model finetuned from Phi-4 using supervised fine-tuning on a dataset of chain-of-thought traces and reinforcement learning. The supervised fine-tuning dataset includes a blend of synthetic prompts and high-quality filtered data from public domain websites, focused on math, science, and coding skills as well as alignment data for safety and Responsible AI. The goal of this approach was to ensure that small capable models were trained with data focused on high quality and advanced reasoning.", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 4000, "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking" ], "original_types": "llm", "original_features": "thinking" } }, { "id": "text-embedding-v4", "description": "This is the Tongyi Laboratory's multilingual unified text vector model trained based on Qwen3, which significantly improves performance in text retrieval, clustering, and classification compared to version V3; it achieves a 15% to 40% improvement on evaluation tasks such as MTEB multilingual, Chinese-English, and code retrieval; supports user-defined vector dimensions ranging from 64 to 2048.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.08, "currency": "USD" }, "output": { "per_million_tokens": 0.08, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "qwen-turbo-latest", "description": "The Qwen series model with the fastest speed and lowest cost, suitable for simple tasks. This model is a dynamically updated version, and updates will not be announced in advance. The model's overall Chinese and English abilities have been significantly improved, human preference alignment has been greatly enhanced, inference capability and complex instruction understanding have been substantially strengthened, performance on difficult tasks is better, and mathematics and coding skills have been significantly improved. The current version is qwen-turbo-2025-04-28.", "pricing": { "input": { "per_million_tokens": 0.046, "currency": "USD" }, "output": { "per_million_tokens": 0.92, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "aihubmix-phi-4-mini-reasoning", "description": "Phi-4-mini-reasoning is a lightweight open model designed for advanced mathematical reasoning and logic-intensive problem-solving. It is particularly well-suited for tasks such as formal proofs, symbolic computation, and solving multi-step word problems. With its efficient architecture, the model balances high-quality reasoning performance with cost-effective deployment, making it ideal for educational applications, embedded tutoring, and lightweight edge or mobile systems.\n\nPhi-4-mini-reasoning supports a 128K token context length, enabling it to process and reason over long mathematical problems and proofs. Built on synthetic and high-quality math datasets, the model leverages advanced fine-tuning techniques such as supervised fine-tuning and preference modeling to enhance reasoning capabilities. Its training incorporates safety and alignment protocols, ensuring robust and reliable performance across supported use cases.", "input_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 4000, "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 0.12, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "aihub-phi-4-multimodal-instruct", "description": "Microsoft's latest model", "input_modalities": [ "TEXT", "VISION", "AUDIO" ], "context_window": 128000, "max_output_tokens": 4000, "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 0.48, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "aihub-phi-4-mini-instruct", "description": "Microsoft's latest model", "input_modalities": [ "TEXT" ], "context_window": 128000, "max_output_tokens": 4000, "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 0.48, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "aihub-phi-4", "description": "Phi-4 is a state-of-the-art open model based on a combination of synthetic datasets, curated public domain website data, and acquired academic books and QA datasets. The approach aims to ensure that small, efficient models are trained using data focused on high quality and advanced reasoning.", "input_modalities": [ "TEXT" ], "context_window": 16400, "max_output_tokens": 16400, "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 0.48, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "claude-3-opus-20240229", "description": "Claude’s previous generation strongest model", "pricing": { "input": { "per_million_tokens": 16.5, "currency": "USD" }, "output": { "per_million_tokens": 82.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "dall-e-3", "description": "dall-e-3 is an AI image generation model that converts natural language prompts into realistic visuals and artistic content. It delivers accurate semantic understanding, supports customizable output resolutions, and produces high-quality images across a wide range of styles, making it well-suited for concept design, creative prototyping, and professional content workflows.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 40, "currency": "USD" }, "output": { "per_million_tokens": 40, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "doubao-embedding-text-240715", "description": "doubao-embedding-text-240715\nDoubao Embedding is a semantic vectorization model developed by ByteDance, primarily designed for vector search scenarios. It supports both Chinese and English languages and has a maximum context length of approximately 4K tokens.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.7, "currency": "USD" }, "output": { "per_million_tokens": 0.7, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "grok-3-fast", "pricing": { "input": { "per_million_tokens": 5.5, "currency": "USD" }, "output": { "per_million_tokens": 27.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen3-4b", "description": "Achieves effective integration of thinking and non-thinking modes, allowing mode switching during conversations. Its reasoning ability reaches state-of-the-art (SOTA) levels among models of the same scale, with significantly enhanced human preference alignment. There are notable improvements in creative writing, role-playing, multi-turn dialogue, and instruction following, resulting in a noticeably better user experience.", "pricing": { "input": { "per_million_tokens": 0.046, "currency": "USD" }, "output": { "per_million_tokens": 0.46, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "deepseek-ai/deepseek-r1-zero", "description": "Openly deployed by chutes.ai; inference with FP8; zero is the initial preliminary version of R1 without optimizations and is not recommended for use unless for research purposes.", "pricing": { "input": { "per_million_tokens": 2.2, "currency": "USD" }, "output": { "per_million_tokens": 2.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "grok-3-fast-beta", "pricing": { "input": { "per_million_tokens": 5.5, "currency": "USD" }, "output": { "per_million_tokens": 27.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen3-1.7b", "description": "Effectively integrates thinking and non-thinking modes, allowing mode switching during conversations. Its general capabilities significantly surpass those of the Qwen2.5 small-scale series, with greatly enhanced human preference alignment. There are notable improvements in creative writing, role-playing, multi-turn dialogue, and instruction following, resulting in a significantly better expected user experience.", "pricing": { "input": { "per_million_tokens": 0.046, "currency": "USD" }, "output": { "per_million_tokens": 0.46, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "qwen3-0.6b", "description": "Effectively integrates thinking and non-thinking modes, allowing mode switching during conversations. Its general capabilities significantly surpass those of the Qwen2.5 small-scale series.", "pricing": { "input": { "per_million_tokens": 0.046, "currency": "USD" }, "output": { "per_million_tokens": 0.46, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "qwen-turbo-2025-04-28", "description": "The Qwen3 series Turbo model effectively integrates thinking and non-thinking modes, allowing seamless switching between modes during conversations. With a smaller parameter size, its reasoning ability rivals that of QwQ-32B, and its general capabilities significantly surpass those of Qwen2.5-Turbo, reaching state-of-the-art (SOTA) levels among models of the same scale. This version is a snapshot model as of April 28, 2025.", "pricing": { "input": { "per_million_tokens": 0.046, "currency": "USD" }, "output": { "per_million_tokens": 0.92, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "grok-3-mini-fast-beta", "pricing": { "input": { "per_million_tokens": 0.33, "currency": "USD" }, "output": { "per_million_tokens": 2.20011, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen-3-32b", "description": "cerebras", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "command-a-03-2025", "description": "Command A is Cohere most performant model to date, excelling at tool use, agents, retrieval augmented generation (RAG), and multilingual use cases. Command A has a context length of 256K, only requires two GPUs to run, and has 150% higher throughput compared to Command R+ 08-2024.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "qwen-plus-2025-04-28", "description": "The Qwen3 series Plus model effectively integrates thinking and non-thinking modes, allowing for mode switching during conversations. Its reasoning abilities significantly surpass those of QwQ, and its general capabilities are markedly superior to Qwen2.5-Plus, reaching state-of-the-art (SOTA) levels among models of the same scale. This version is a snapshot model as of April 28, 2025.", "pricing": { "input": { "per_million_tokens": 0.13, "currency": "USD" }, "output": { "per_million_tokens": 2.6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "thudm/glm-z1-32b-0414", "description": "GLM-Z1-32B-0414 is a reasoning-focused AI model built on GLM-4-32B-0414. It has been enhanced through cold-start methods and reinforcement learning, with a strong emphasis on math, coding, and logic tasks. Despite having only 32B parameters, it performs comparably to the 671B DeepSeek-R1 on some benchmarks. It excels in complex reasoning tasks, as shown in evaluations like AIME 24/25, LiveCodeBench, and GPQA.", "pricing": { "input": { "per_million_tokens": 0.08, "currency": "USD" }, "output": { "per_million_tokens": 0.08, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "pro/thudm/glm-4.1v-9b-thinking", "description": "GLM-4.1V-9B-Thinking is an open-source Vision Language Model (VLM) jointly released by Zhipu AI and the KEG Laboratory at Tsinghua University, designed specifically for handling complex multimodal cognitive tasks. Based on the GLM-4-9B-0414 foundation model, it significantly enhances cross-modal reasoning ability and stability by introducing the “Chain-of-Thought” reasoning mechanism and using reinforcement learning strategies. As a lightweight model with 9 billion parameters, it strikes a balance between deployment efficiency and performance. In 28 authoritative benchmark evaluations, it matched or even outperformed the 72-billion-parameter Qwen-2.5-VL-72B model in 18 tasks. The model excels not only in image-text understanding, mathematical and scientific reasoning, and video understanding, but also supports images up to 4K resolution and inputs of arbitrary aspect ratios.", "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.16, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "thudm/glm-4.1v-9b-thinking", "description": "GLM-4.1V-9B-Thinking is an open-source Vision Language Model (VLM) jointly released by Zhipu AI and the KEG Laboratory at Tsinghua University, designed specifically for handling complex multimodal cognitive tasks. Based on the GLM-4-9B-0414 foundation model, it significantly enhances cross-modal reasoning ability and stability by introducing the “Chain-of-Thought” reasoning mechanism and using reinforcement learning strategies. As a lightweight model with 9 billion parameters, it strikes a balance between deployment efficiency and performance. In 28 authoritative benchmark evaluations, it matched or even outperformed the 72-billion-parameter Qwen-2.5-VL-72B model in 18 tasks. The model excels not only in image-text understanding, mathematical and scientific reasoning, and video understanding, but also supports images up to 4K resolution and inputs of arbitrary aspect ratios.", "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-embedding-004", "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "thudm/glm-4-32b-0414", "description": "GLM-4-32B-0414 is a next-generation open-source model with 32 billion parameters, delivering performance comparable to OpenAI’s GPT series and DeepSeek V3/R1. It supports smooth local deployment.\n\nThe base model was pre-trained on 15T of high-quality data, including a large amount of reasoning-focused synthetic content, setting the stage for advanced reinforcement learning.\n\nIn the post-training phase, techniques like human preference alignment, rejection sampling, and reinforcement learning were used to improve the model’s ability to follow instructions, generate code, and handle function calls—core skills needed for agent-style tasks.\n\nGLM-4-32B-0414 has shown strong results in engineering code, artifact generation, function calling, search-based QA, and report writing—sometimes matching or even surpassing larger models like GPT-4o and DeepSeek-V3 (671B) on specific benchmarks.", "pricing": { "input": { "per_million_tokens": 0.08, "currency": "USD" }, "output": { "per_million_tokens": 0.08, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "thudm/glm-z1-9b-0414", "description": "GLM-Z1-9B-0414 is a small but powerful model in the GLM series, with only 9 billion parameters. Despite its size, it delivers strong performance in math reasoning and general tasks, ranking among the best in its class of open-source models.\n\nTrained with the same techniques as larger models, it strikes an excellent balance between performance and efficiency—making it a great option for low-resource or lightweight deployment scenarios.", "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "thudm/glm-4-9b-0414", "description": "GLM-4-9B-0414 is a lightweight model in the GLM family, with 9 billion parameters. It inherits the core tech from GLM-4-32B and offers an efficient option for deployment on limited resources.\n\nDespite its smaller size, it performs well in tasks like code generation, web design, SVG graphics creation, and search-based writing. It also supports function calling to interact with external tools, enhancing its versatility.\n\nGLM-4-9B-0414 strikes a solid balance between efficiency and performance, making it a strong choice for low-resource environments—while remaining competitive on various benchmarks.", "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "cc-doubao-seed-code-preview-latest", "description": "claude code ", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-seed-code-preview-latest", "description": "chat", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/janus-pro-7b", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "glm-zero-preview", "description": "Simply put, it is the intelligent enhanced version of O1.", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen-3-235b-a22b-instruct-2507", "description": "cerebras", "pricing": { "input": { "per_million_tokens": 0.28, "currency": "USD" }, "output": { "per_million_tokens": 1.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-2.0-flash-thinking-exp-1219", "description": "The Gemini 2.0 Flash Thinking mode is an experimental model designed to generate the \"thinking process\" that the model undergoes during its response. Therefore, the Gemini 2.0 Flash Thinking mode possesses stronger reasoning capabilities in its responses compared to the base Gemini 2.0 Flash model.", "pricing": { "input": { "per_million_tokens": 0.076, "currency": "USD" }, "output": { "per_million_tokens": 0.304, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "nvidia/llama-3_1-nemotron-ultra-253b-v1", "description": "Llama-3.1-Nemotron-Ultra-253B is a 253 billion parameter reasoning-focused language model optimized for efficiency that excels at math, coding, and general instruction-following tasks while running on a single 8xH100 node.", "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4-32k", "description": "The smartest version of GPT-4; OpenAI no longer offers it officially. All the 32k versions on this site are provided by Microsoft, deployed on Azure OpenAI by the official Microsoft service.", "pricing": { "input": { "per_million_tokens": 60, "currency": "USD" }, "output": { "per_million_tokens": 120, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "o1-preview-2024-09-12", "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" }, "cache_read": { "per_million_tokens": 7.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "coding-glm-4.5-air", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.014, "currency": "USD" }, "output": { "per_million_tokens": 0.084, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qvq-72b-preview", "pricing": { "input": { "per_million_tokens": 1.2, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwq-32b-preview", "pricing": { "input": { "per_million_tokens": 0.16, "currency": "USD" }, "output": { "per_million_tokens": 0.16, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.1-sonar-huge-128k-online", "description": "On February 22, 2025, this model will be officially discontinued. The Perplexity AI official fine-tuned LLMA internet-connected interface is currently only supported at the api.aihubmix.com address.", "pricing": { "input": { "per_million_tokens": 5.6, "currency": "USD" }, "output": { "per_million_tokens": 5.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.1-sonar-large-128k-online", "description": "On February 22, 2025, this model will be officially discontinued; Perplexity AI's official fine-tuned LLMA internet-connected interface is currently only supported at the api.aihubmix.com address.", "pricing": { "input": { "per_million_tokens": 1.2, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-mistral-large-2411", "description": "The latest Mistral Large 2 model is deployed on Azure.", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-mistral-large-2407", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 9, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "grok-2-1212", "pricing": { "input": { "per_million_tokens": 1.8, "currency": "USD" }, "output": { "per_million_tokens": 9, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "glm-4.5-flash", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0, "currency": "USD" }, "output": { "per_million_tokens": 0, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-image-test", "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 40, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "imagen-3.0-generate-002", "description": "Imagen 3.0 is Google's latest text-to-image generation model, capable of creating high-quality images from natural language prompts. Compared to its predecessors, Imagen 3.0 offers significant improvements in detail, lighting, and reduced visual artifacts. It supports rendering in various artistic styles, from photorealism to impressionism, as well as abstract and anime styles.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "llama3.1-8b", "description": "cerebras", "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "o1-2024-12-17", "capabilities": [ "REASONING" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" }, "cache_read": { "per_million_tokens": 7.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "thinking" ], "original_types": "llm", "original_features": "thinking" } }, { "id": "sf-kimi-k2-thinking", "pricing": { "input": { "per_million_tokens": 0.548, "currency": "USD" }, "output": { "per_million_tokens": 2.192, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "describe", "description": "This endpoint is used to describe an image.\nSupported image formats include JPEG, PNG, and WebP.\nUS $0.01/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "upscale", "description": "The super-resolution upscale interface of the Ideogram AI drawing model is designed to enlarge low-resolution images into high-resolution ones, redrawing details (with controllable similarity and detail proportions).\nUS $0.06/ IMG.\nFor usage examples and pricing details, refer to the documentation at https://docs.aihubmix.com/cn/api/IdeogramAI.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "bai-qwen3-vl-235b-a22b-instruct", "description": "The Qwen3 series open-source models include hybrid models, thinking models, and non-thinking models, with both reasoning capabilities and general abilities reaching industry SOTA levels at the same scale.", "pricing": { "input": { "per_million_tokens": 0.274, "currency": "USD" }, "output": { "per_million_tokens": 1.096, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "cc-minimax-m2", "description": "For Claude Code only", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "cc-deepseek-v3", "description": "For Claude code only", "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "cc-deepseek-v3.1", "description": "For Claude code only", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.56, "currency": "USD" }, "output": { "per_million_tokens": 1.68, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "cc-ernie-4.5-300b-a47b", "description": "For Claude code only", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.32, "currency": "USD" }, "output": { "per_million_tokens": 1.28, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "cc-kimi-dev-72b", "description": "For Claude code only", "pricing": { "input": { "per_million_tokens": 0.32, "currency": "USD" }, "output": { "per_million_tokens": 1.28, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "cc-kimi-k2-instruct", "description": "For Claude code only", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 3.3, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "cc-kimi-k2-instruct-0905", "description": "For Claude code only", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 3.3, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "cc-kimi-k2-thinking", "description": "Dedicated for Claude Code", "pricing": { "input": { "per_million_tokens": 0.548, "currency": "USD" }, "output": { "per_million_tokens": 2.192, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "computer-use-preview", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 12, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-2.0-flash-thinking-exp", "pricing": { "input": { "per_million_tokens": 0.076, "currency": "USD" }, "output": { "per_million_tokens": 0.304, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "baichuan3-turbo", "pricing": { "input": { "per_million_tokens": 1.9, "currency": "USD" }, "output": { "per_million_tokens": 1.9, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "baichuan3-turbo-128k", "pricing": { "input": { "per_million_tokens": 3.8, "currency": "USD" }, "output": { "per_million_tokens": 3.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "baichuan4", "pricing": { "input": { "per_million_tokens": 16, "currency": "USD" }, "output": { "per_million_tokens": 16, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "baichuan4-air", "pricing": { "input": { "per_million_tokens": 0.16, "currency": "USD" }, "output": { "per_million_tokens": 0.16, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "baichuan4-turbo", "pricing": { "input": { "per_million_tokens": 2.4, "currency": "USD" }, "output": { "per_million_tokens": 2.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-v3", "pricing": { "input": { "per_million_tokens": 0.272, "currency": "USD" }, "output": { "per_million_tokens": 1.088, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-1.5-lite-32k", "description": "Doubao-1.5-lite, a brand-new generation of lightweight model, offers exceptional response speed with both performance and latency reaching world-class levels. It supports a 32k context window and an output length of up to 12k tokens.", "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.01, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-1.5-pro-256k", "description": "Doubao-1.5-pro-256k, a fully upgraded version based on Doubao-1.5-Pro, delivers an overall performance improvement of 10%. It supports inference with a 256k context window and an output length of up to 12k tokens. With higher performance, larger window size, and exceptional cost-effectiveness, it is suitable for a wider range of application scenarios.", "pricing": { "input": { "per_million_tokens": 0.8, "currency": "USD" }, "output": { "per_million_tokens": 1.44, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-1.5-pro-32k", "description": "Doubao-1.5-pro, a brand-new generation of flagship model, features comprehensive performance upgrades and excels in knowledge, coding, reasoning, and other aspects. It supports a 32k context window and an output length of up to 12k tokens.", "pricing": { "input": { "per_million_tokens": 0.134, "currency": "USD" }, "output": { "per_million_tokens": 0.335, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0268, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-1.5-vision-pro-32k", "description": "Doubao-1.5-vision-pro is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios. It enhances visual reasoning, document recognition, detailed information understanding, and instruction-following capabilities. It supports a 32k context window and an output length of up to 12k tokens.", "pricing": { "input": { "per_million_tokens": 0.46, "currency": "USD" }, "output": { "per_million_tokens": 1.38, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-lite-128k", "pricing": { "input": { "per_million_tokens": 0.14, "currency": "USD" }, "output": { "per_million_tokens": 0.28, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.14, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-lite-32k", "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.12, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.012, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-lite-4k", "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.12, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.06, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-pro-128k", "pricing": { "input": { "per_million_tokens": 0.8, "currency": "USD" }, "output": { "per_million_tokens": 1.44, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-pro-256k", "pricing": { "input": { "per_million_tokens": 0.8, "currency": "USD" }, "output": { "per_million_tokens": 1.44, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-pro-32k", "pricing": { "input": { "per_million_tokens": 0.14, "currency": "USD" }, "output": { "per_million_tokens": 0.35, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.028, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-pro-4k", "pricing": { "input": { "per_million_tokens": 0.14, "currency": "USD" }, "output": { "per_million_tokens": 0.35, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gryphe/mythomax-l2-13b", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "minimax-text-01", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.14, "currency": "USD" }, "output": { "per_million_tokens": 1.12, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "long_context" ], "original_features": "long_context" } }, { "id": "qwen/qwen2-1.5b-instruct", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen2-57b-a14b-instruct", "pricing": { "input": { "per_million_tokens": 0.24, "currency": "USD" }, "output": { "per_million_tokens": 0.24, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen2-72b-instruct", "pricing": { "input": { "per_million_tokens": 0.8, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen2-7b-instruct", "pricing": { "input": { "per_million_tokens": 0.08, "currency": "USD" }, "output": { "per_million_tokens": 0.08, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen2.5-32b-instruct", "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen2.5-72b-instruct", "pricing": { "input": { "per_million_tokens": 0.8, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen2.5-72b-instruct-128k", "pricing": { "input": { "per_million_tokens": 0.8, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen2.5-7b-instruct", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen/qwen2.5-coder-32b-instruct", "pricing": { "input": { "per_million_tokens": 0.16, "currency": "USD" }, "output": { "per_million_tokens": 0.16, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "stable-diffusion-3-5-large", "description": "Stable Diffusion 3.5 Large, developed by Stability AI, is a text-to-image generation model that supports high-quality image creation with excellent prompt responsiveness and customization, suitable for professional applications.", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "wizardlm/wizardcoder-python-34b-v1.0", "pricing": { "input": { "per_million_tokens": 0.9, "currency": "USD" }, "output": { "per_million_tokens": 0.9, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "ahm-phi-3-5-moe-instruct", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "ahm-phi-3-5-mini-instruct", "description": "Phi-3.5-mini is a lightweight, state-of-the-art open model built upon the dataset used for Phi-3—which includes synthetic data and carefully curated publicly available websites—focusing on very high-quality, reasoning-intensive data. This model is part of the Phi-3 model family and supports a context length of 128K tokens.", "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 3, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "ahm-phi-3-5-vision-instruct", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.6, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "ahm-phi-3-medium-128k", "pricing": { "input": { "per_million_tokens": 6, "currency": "USD" }, "output": { "per_million_tokens": 18, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "ahm-phi-3-medium-4k", "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 3, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "ahm-phi-3-small-128k", "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 3, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-codestral-2501", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-cohere-command-r", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.64, "currency": "USD" }, "output": { "per_million_tokens": 1.92, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "aihubmix-jamba-1-5-large", "pricing": { "input": { "per_million_tokens": 2.2, "currency": "USD" }, "output": { "per_million_tokens": 8.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-llama-3-1-405b-instruct", "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-llama-3-1-70b-instruct", "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 0.78, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-llama-3-1-8b-instruct", "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-llama-3-2-11b-vision", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-llama-3-2-90b-vision", "pricing": { "input": { "per_million_tokens": 2.4, "currency": "USD" }, "output": { "per_million_tokens": 2.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-llama-3-70b-instruct", "pricing": { "input": { "per_million_tokens": 0.7, "currency": "USD" }, "output": { "per_million_tokens": 0.7, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-mistral-large", "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 12, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aihubmix-command-r-08-2024", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "aihubmix-command-r-plus", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 3.84, "currency": "USD" }, "output": { "per_million_tokens": 19.2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "aihubmix-command-r-plus-08-2024", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 2.8, "currency": "USD" }, "output": { "per_million_tokens": 11.2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "baidu-deepseek-v3.2", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.274, "currency": "USD" }, "output": { "per_million_tokens": 0.411, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0274, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "baidu-deepseek-v3.2-exp", "capabilities": [ "FUNCTION_CALL", "STRUCTURED_OUTPUT" ], "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.274, "currency": "USD" }, "output": { "per_million_tokens": 0.411, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.0274, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "tools", "function_calling", "structured_outputs" ], "original_types": "llm", "original_features": "tools,function_calling,structured_outputs" } }, { "id": "chatglm_lite", "pricing": { "input": { "per_million_tokens": 0.2858, "currency": "USD" }, "output": { "per_million_tokens": 0.2858, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "chatglm_pro", "pricing": { "input": { "per_million_tokens": 1.4286, "currency": "USD" }, "output": { "per_million_tokens": 1.4286, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "chatglm_std", "pricing": { "input": { "per_million_tokens": 0.7144, "currency": "USD" }, "output": { "per_million_tokens": 0.7144, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "chatglm_turbo", "pricing": { "input": { "per_million_tokens": 0.7144, "currency": "USD" }, "output": { "per_million_tokens": 0.7144, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "claude-2", "pricing": { "input": { "per_million_tokens": 8.8, "currency": "USD" }, "output": { "per_million_tokens": 8.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "claude-2.0", "pricing": { "input": { "per_million_tokens": 8.8, "currency": "USD" }, "output": { "per_million_tokens": 39.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "claude-2.1", "pricing": { "input": { "per_million_tokens": 8.8, "currency": "USD" }, "output": { "per_million_tokens": 39.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "claude-3-5-sonnet-20240620", "description": "Claude 3.5 Sonnet delivers performance superior to Opus and speeds faster than its predecessor, all at the same price point. Its core strengths include:\n\nCoding: Autonomously writes, edits, and executes code with advanced reasoning and troubleshooting.\nData Science: Augments human expertise by analyzing unstructured data and using multiple tools to generate insights.\nVisual Processing: Excels at interpreting charts, graphs, and images, accurately transcribing text to derive high-level insights.\nAgentic Tasks: Exceptional tool use makes it highly effective for complex, multi-step agentic workflows that interact with other systems.", "input_modalities": [ "TEXT", "VISION" ], "context_window": 200000, "max_output_tokens": 8192, "pricing": { "input": { "per_million_tokens": 3.3, "currency": "USD" }, "output": { "per_million_tokens": 16.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "claude-3-5-sonnet@20240620", "pricing": { "input": { "per_million_tokens": 3.3, "currency": "USD" }, "output": { "per_million_tokens": 16.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "claude-3-haiku-20240229", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.275, "currency": "USD" }, "output": { "per_million_tokens": 0.275, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "claude-3-haiku-20240307", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.275, "currency": "USD" }, "output": { "per_million_tokens": 1.375, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "claude-3-haiku@20240307", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 0.275, "currency": "USD" }, "output": { "per_million_tokens": 1.375, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "claude-3-opus@20240229", "pricing": { "input": { "per_million_tokens": 16.5, "currency": "USD" }, "output": { "per_million_tokens": 82.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "claude-3-sonnet-20240229", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 3.3, "currency": "USD" }, "output": { "per_million_tokens": 16.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "claude-instant-1", "pricing": { "input": { "per_million_tokens": 1.793, "currency": "USD" }, "output": { "per_million_tokens": 1.793, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "claude-instant-1.2", "pricing": { "input": { "per_million_tokens": 0.88, "currency": "USD" }, "output": { "per_million_tokens": 3.96, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "code-davinci-edit-001", "pricing": { "input": { "per_million_tokens": 20, "currency": "USD" }, "output": { "per_million_tokens": 20, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "cogview-3", "pricing": { "input": { "per_million_tokens": 35.5, "currency": "USD" }, "output": { "per_million_tokens": 35.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "cogview-3-plus", "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "command", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "command-light", "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "command-light-nightly", "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "command-nightly", "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "command-r", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.64, "currency": "USD" }, "output": { "per_million_tokens": 1.92, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "command-r-plus", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 3.84, "currency": "USD" }, "output": { "per_million_tokens": 19.2, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "dall-e-2", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 16, "currency": "USD" }, "output": { "per_million_tokens": 16, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "davinci", "pricing": { "input": { "per_million_tokens": 20, "currency": "USD" }, "output": { "per_million_tokens": 20, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "davinci-002", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-coder-v2-instruct", "pricing": { "input": { "per_million_tokens": 0.16, "currency": "USD" }, "output": { "per_million_tokens": 0.32, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-r1-distill-llama-70b", "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-r1-distill-llama-8b", "pricing": { "input": { "per_million_tokens": 0.01, "currency": "USD" }, "output": { "per_million_tokens": 0.01, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-r1-distill-qwen-1.5b", "pricing": { "input": { "per_million_tokens": 0.01, "currency": "USD" }, "output": { "per_million_tokens": 0.01, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-r1-distill-qwen-14b", "description": "Open source deployment from SiliconFlow, the model itself is obtained through knowledge distillation.", "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-r1-distill-qwen-32b", "description": "Open source deployment from SiliconFlow, the model itself is obtained through knowledge distillation.", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-r1-distill-qwen-7b", "description": "Open source deployment from SiliconFlow, the model itself is obtained through knowledge distillation.", "pricing": { "input": { "per_million_tokens": 0.01, "currency": "USD" }, "output": { "per_million_tokens": 0.01, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-v2-chat", "pricing": { "input": { "per_million_tokens": 0.16, "currency": "USD" }, "output": { "per_million_tokens": 0.32, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-v2.5", "pricing": { "input": { "per_million_tokens": 0.16, "currency": "USD" }, "output": { "per_million_tokens": 0.32, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-llm-67b-chat", "pricing": { "input": { "per_million_tokens": 0.16, "currency": "USD" }, "output": { "per_million_tokens": 0.16, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-ai/deepseek-vl2", "pricing": { "input": { "per_million_tokens": 0.16, "currency": "USD" }, "output": { "per_million_tokens": 0.16, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-v3", "pricing": { "input": { "per_million_tokens": 0.272, "currency": "USD" }, "output": { "per_million_tokens": 1.088, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "distil-whisper-large-v3-en", "input_modalities": [ "AUDIO" ], "pricing": { "input": { "per_million_tokens": 5.556, "currency": "USD" }, "output": { "per_million_tokens": 5.556, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "stt" ], "original_types": "stt" } }, { "id": "doubao-1-5-thinking-vision-pro-250428", "description": "Deep Thinking \nImage Understanding \nVisual Localization \nVideo Understanding \nTool Invocation \nStructured Output", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-2.0-flash-exp-image-generation", "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-2.0-flash-lite", "description": "Gemini-2.0-flash Lightweight Official Version", "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 0.076, "currency": "USD" }, "output": { "per_million_tokens": 0.304, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.076, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "long_context" ], "original_types": "llm", "original_features": "long_context" } }, { "id": "gemini-2.5-pro-exp-03-25", "description": "Google’s latest experimental model, highly unstable, for experience only.\nIt boasts strong reasoning and coding capabilities, able to \"think\" before responding, enhancing performance and accuracy in complex tasks. It supports multimodal inputs (text, audio, images, video) and a 1 million token context window, suitable for advanced programming, math, and science tasks.\n\nThis means Gemini 2.5 can handle more complex problems in coding, science and math, and support more context-aware agents.", "capabilities": [ "STRUCTURED_OUTPUT", "FUNCTION_CALL" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.31, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "structured_outputs", "tools", "long_context" ], "original_types": "llm", "original_features": "structured_outputs,tools,long_context" } }, { "id": "gemini-embedding-exp-03-07", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "gemini-exp-1114", "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-exp-1121", "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-pro", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemini-pro-vision", "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemma-7b-it", "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gemma2-9b-it", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "glm-3-turbo", "pricing": { "input": { "per_million_tokens": 0.71, "currency": "USD" }, "output": { "per_million_tokens": 0.71, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "glm-4", "pricing": { "input": { "per_million_tokens": 14.2, "currency": "USD" }, "output": { "per_million_tokens": 14.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "glm-4-flash", "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "glm-4-plus", "pricing": { "input": { "per_million_tokens": 8, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "glm-4.5-airx", "description": "GLM-4.5-AirX is the high-speed version of GLM-4.5-Air, with faster response times, specifically designed for large-scale high-speed demands.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 4.51, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.22, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "glm-4v", "pricing": { "input": { "per_million_tokens": 14.2, "currency": "USD" }, "output": { "per_million_tokens": 14.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "glm-4v-plus", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "google/gemini-exp-1114", "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "google/gemma-2-9b-it:free", "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-3.5-turbo-0125", "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-3.5-turbo-0301", "pricing": { "input": { "per_million_tokens": 1.5, "currency": "USD" }, "output": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-3.5-turbo-1106", "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-3.5-turbo-16k-0613", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4-0125-preview", "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4-0613", "pricing": { "input": { "per_million_tokens": 30, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4-32k-0314", "pricing": { "input": { "per_million_tokens": 60, "currency": "USD" }, "output": { "per_million_tokens": 120, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4-32k-0613", "pricing": { "input": { "per_million_tokens": 60, "currency": "USD" }, "output": { "per_million_tokens": 120, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4-turbo-2024-04-09", "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4-vision-preview", "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "grok-2-vision-1212", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 1.8, "currency": "USD" }, "output": { "per_million_tokens": 9, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "grok-vision-beta", "input_modalities": [ "TEXT", "VISION" ], "pricing": { "input": { "per_million_tokens": 5.6, "currency": "USD" }, "output": { "per_million_tokens": 16.8, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "imagen-4.0-generate-preview-05-20", "description": "Google's latest raw image model", "capabilities": [ "IMAGE_GENERATION" ], "input_modalities": [ "TEXT", "VISION" ], "output_modalities": [ "VISION" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "image_generation" ], "category": "image-generation", "original_types": "image_generation" } }, { "id": "jina-embeddings-v2-base-code", "description": "Model optimized for code and document search, 768-dimensional, 137M parameters.", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.05, "currency": "USD" }, "output": { "per_million_tokens": 0.05, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "learnlm-1.5-pro-experimental", "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.1-405b-reasoning", "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.1-70b", "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.1-70b-versatile", "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.1-8b-instant", "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.1-sonar-small-128k-online", "description": "On February 22, 2025, this model will be officially discontinued. The Perplexity AI official fine-tuned LLMA online interface is currently supported only at the api.aihubmix.com address.", "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.2-11b-vision-preview", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.2-1b-preview", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.2-3b-preview", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama-3.2-90b-vision-preview", "pricing": { "input": { "per_million_tokens": 2.4, "currency": "USD" }, "output": { "per_million_tokens": 2.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama2-70b-4096", "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm" ], "original_types": "llm" } }, { "id": "llama2-7b-2048", "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama3-70b-8192", "pricing": { "input": { "per_million_tokens": 0.7, "currency": "USD" }, "output": { "per_million_tokens": 0.937288, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama3-70b-8192(33)", "pricing": { "input": { "per_million_tokens": 2.65, "currency": "USD" }, "output": { "per_million_tokens": 2.65, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama3-8b-8192", "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.12, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama3-8b-8192(33)", "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama3-groq-70b-8192-tool-use-preview", "pricing": { "input": { "per_million_tokens": 0.00089, "currency": "USD" }, "output": { "per_million_tokens": 0.00089, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "llama3-groq-8b-8192-tool-use-preview", "pricing": { "input": { "per_million_tokens": 0.00019, "currency": "USD" }, "output": { "per_million_tokens": 0.00019, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "meta-llama/llama-3.2-90b-vision-instruct", "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "meta-llama/llama-3.1-405b-instruct:free", "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "meta-llama/llama-3.1-70b-instruct:free", "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "meta-llama/llama-3.1-8b-instruct:free", "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "meta-llama/llama-3.2-11b-vision-instruct:free", "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "meta/llama-3.1-405b-instruct", "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "meta/llama3-8b-chat", "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "moonshot-v1-128k", "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "moonshot-v1-128k-vision-preview", "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "moonshot-v1-32k", "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "moonshot-v1-32k-vision-preview", "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "moonshot-v1-8k", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "moonshot-v1-8k-vision-preview", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "o1-mini-2024-09-12", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 12, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "omni-moderation-latest", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen-flash", "description": "The model adopts tiered pricing.", "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen-flash-2025-07-28", "description": "The model adopts tiered pricing.", "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen-long", "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen-max-longcontext", "pricing": { "input": { "per_million_tokens": 7, "currency": "USD" }, "output": { "per_million_tokens": 21, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen-turbo-2024-11-01", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.36, "currency": "USD" }, "output": { "per_million_tokens": 1.08, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "llm", "long_context" ], "original_types": "llm", "original_features": "long_context" } }, { "id": "qwen2.5-14b-instruct", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen2.5-32b-instruct", "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen2.5-3b-instruct", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen2.5-72b-instruct", "pricing": { "input": { "per_million_tokens": 0.8, "currency": "USD" }, "output": { "per_million_tokens": 2.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen2.5-7b-instruct", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 0.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen2.5-coder-1.5b-instruct", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen2.5-math-1.5b-instruct", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen2.5-math-72b-instruct", "pricing": { "input": { "per_million_tokens": 0.8, "currency": "USD" }, "output": { "per_million_tokens": 2.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qwen2.5-math-7b-instruct", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "step-2-16k", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-ada-001", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-babbage-001", "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-curie-001", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-davinci-002", "pricing": { "input": { "per_million_tokens": 20, "currency": "USD" }, "output": { "per_million_tokens": 20, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-davinci-003", "pricing": { "input": { "per_million_tokens": 20, "currency": "USD" }, "output": { "per_million_tokens": 20, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-davinci-edit-001", "pricing": { "input": { "per_million_tokens": 20, "currency": "USD" }, "output": { "per_million_tokens": 20, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-embedding-v1", "input_modalities": [ "TEXT" ], "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "embedding" ], "original_types": "embedding" } }, { "id": "text-moderation-007", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-moderation-latest", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-moderation-stable", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "text-search-ada-doc-001", "pricing": { "input": { "per_million_tokens": 20, "currency": "USD" }, "output": { "per_million_tokens": 20, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "tts-1", "input_modalities": [ "AUDIO" ], "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "tts" ], "original_types": "tts" } }, { "id": "tts-1-1106", "input_modalities": [ "AUDIO" ], "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "tts" ], "original_types": "tts" } }, { "id": "tts-1-hd", "input_modalities": [ "AUDIO" ], "pricing": { "input": { "per_million_tokens": 30, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "tts" ], "original_types": "tts" } }, { "id": "tts-1-hd-1106", "input_modalities": [ "AUDIO" ], "pricing": { "input": { "per_million_tokens": 30, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "tts" ], "original_types": "tts" } }, { "id": "veo-3", "description": "veo3 reverse access with a total cost of just $0.41 per video generation., OpenAI chat port compatible format.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "veo3", "description": "veo3 reverse access with a total cost of just $0.41 per video generation., OpenAI chat port compatible format.\nNote that this is a reverse interface, and charges are based on the number of requests. As long as a request is initiated, even if it returns a failure, you will be charged. If you cannot accept this, please do not use it.", "capabilities": [ "VIDEO_GENERATION" ], "input_modalities": [ "TEXT", "VISION", "AUDIO", "VIDEO" ], "output_modalities": [ "VIDEO" ], "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" }, "cache_read": { "per_million_tokens": 0, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "video" ], "category": "video-generation", "original_types": "video" } }, { "id": "whisper-1", "description": "Ignore the displayed price on the page; the actual charge for this model request is consistent with the official, so you can use it with confidence.", "input_modalities": [ "AUDIO" ], "pricing": { "input": { "per_million_tokens": 100, "currency": "USD" }, "output": { "per_million_tokens": 100, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "stt" ], "original_types": "stt" } }, { "id": "whisper-large-v3", "input_modalities": [ "AUDIO" ], "pricing": { "input": { "per_million_tokens": 30.834, "currency": "USD" }, "output": { "per_million_tokens": 30.834, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "stt" ], "original_types": "stt" } }, { "id": "whisper-large-v3-turbo", "input_modalities": [ "AUDIO" ], "pricing": { "input": { "per_million_tokens": 5.556, "currency": "USD" }, "output": { "per_million_tokens": 5.556, "currency": "USD" } }, "metadata": { "source": "aihubmix", "tags": [ "stt" ], "original_types": "stt" } }, { "id": "yi-large", "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 3, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "yi-large-rag", "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "yi-large-turbo", "pricing": { "input": { "per_million_tokens": 1.8, "currency": "USD" }, "output": { "per_million_tokens": 1.8, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "yi-lightning", "pricing": { "input": { "per_million_tokens": 0.2, "currency": "USD" }, "output": { "per_million_tokens": 0.2, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "yi-medium", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "yi-vl-plus", "pricing": { "input": { "per_million_tokens": 0.000852, "currency": "USD" }, "output": { "per_million_tokens": 0.000852, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4o-2024-08-06-global", "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" }, "cache_read": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "gpt-4o-mini-global", "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.075, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "meta-llama-3-70b", "pricing": { "input": { "per_million_tokens": 4.795, "currency": "USD" }, "output": { "per_million_tokens": 4.795, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "meta-llama-3-8b", "pricing": { "input": { "per_million_tokens": 0.548, "currency": "USD" }, "output": { "per_million_tokens": 0.548, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "o3-global", "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "o3-mini-global", "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 4.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.55, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "o3-pro-global", "pricing": { "input": { "per_million_tokens": 20, "currency": "USD" }, "output": { "per_million_tokens": 80, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qianfan-chinese-llama-2-13b", "pricing": { "input": { "per_million_tokens": 0.822, "currency": "USD" }, "output": { "per_million_tokens": 0.822, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "qianfan-llama-vl-8b", "pricing": { "input": { "per_million_tokens": 0.274, "currency": "USD" }, "output": { "per_million_tokens": 0.685, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aistudio_gemini-2.0-flash", "pricing": { "input": { "per_million_tokens": 0.1, "currency": "USD" }, "output": { "per_million_tokens": 0.4, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.25, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "aistudio_gpt-4.1-mini", "pricing": { "input": { "per_million_tokens": 0.4, "currency": "USD" }, "output": { "per_million_tokens": 1.6, "currency": "USD" }, "cache_read": { "per_million_tokens": 0.1, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "deepseek-r1-distill-qianfan-llama-8b", "pricing": { "input": { "per_million_tokens": 0.137, "currency": "USD" }, "output": { "per_million_tokens": 0.548, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-1-5-pro-256k-250115", "pricing": { "input": { "per_million_tokens": 0.684, "currency": "USD" }, "output": { "per_million_tokens": 1.2312, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "doubao-1-5-pro-32k-250115", "pricing": { "input": { "per_million_tokens": 0.108, "currency": "USD" }, "output": { "per_million_tokens": 0.27, "currency": "USD" } }, "metadata": { "source": "aihubmix" } }, { "id": "ai21/jamba-large-1.7", "name": "AI21: Jamba Large 1.7", "description": "Jamba Large 1.7 is the latest model in the Jamba open family, offering improvements in grounding, instruction-following, and overall efficiency. Built on a hybrid SSM-Transformer architecture with a 256K context window, it delivers more accurate, contextually grounded responses and better steerability than previous versions.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754669020 } }, { "id": "ai21/jamba-mini-1.7", "name": "AI21: Jamba Mini 1.7", "description": "Jamba Mini 1.7 is a compact and efficient member of the Jamba open model family, incorporating key improvements in grounding and instruction-following while maintaining the benefits of the SSM-Transformer hybrid architecture and 256K context window. Despite its compact size, it delivers accurate, contextually grounded responses and improved steerability.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754670601 } }, { "id": "aion-labs/aion-1.0", "name": "AionLabs: Aion-1.0", "description": "Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding. It is built on DeepSeek-R1, augmented with additional models and techniques such as Tree of Thoughts (ToT) and Mixture of Experts (MoE). It is Aion Lab's most powerful reasoning model.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738697557 } }, { "id": "aion-labs/aion-1.0-mini", "name": "AionLabs: Aion-1.0-Mini", "description": "Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic. It is a modified variant of a FuseAI model that outperforms R1-Distill-Qwen-32B and R1-Distill-Llama-70B, with benchmark results available on its [Hugging Face page](https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview), independently replicated for verification.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.7, "currency": "USD" }, "output": { "per_million_tokens": 1.4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738697107 } }, { "id": "aion-labs/aion-rp-llama-3.1-8b", "name": "AionLabs: Aion-RP 1.0 (8B)", "description": "Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other’s responses. It is a fine-tuned base model rather than an instruct model, designed to produce more natural and varied writing.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738696718 } }, { "id": "alfredpros/codellama-7b-instruct-solidity", "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity", "description": "A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744641874 } }, { "id": "alibaba/tongyi-deepresearch-30b-a3b", "name": "Tongyi DeepResearch 30B A3B", "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\n\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.09, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758210804 } }, { "id": "allenai/olmo-2-0325-32b-instruct", "name": "AllenAI: Olmo 2 32B Instruct", "description": "OLMo-2 32B Instruct is a supervised instruction-finetuned variant of the OLMo-2 32B March 2025 base model. It excels in complex reasoning and instruction-following tasks across diverse benchmarks such as GSM8K, MATH, IFEval, and general NLP evaluation. Developed by AI2, OLMo-2 32B is part of an open, research-oriented initiative, trained primarily on English-language datasets to advance the understanding and development of open-source language models.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741988556 } }, { "id": "allenai/olmo-3-32b-think", "name": "AllenAI: Olmo 3 32B Think", "description": "Olmo 3 32B Think is a large-scale, 32-billion-parameter model purpose-built for deep reasoning, complex logic chains and advanced instruction-following scenarios. Its capacity enables strong performance on demanding evaluation tasks and highly nuanced conversational reasoning. Developed by Ai2 under the Apache 2.0 license, Olmo 3 32B Think embodies the Olmo initiative’s commitment to openness, offering full transparency across weights, code and training methodology.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.55, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763758276 } }, { "id": "allenai/olmo-3-7b-instruct", "name": "AllenAI: Olmo 3 7B Instruct", "description": "Olmo 3 7B Instruct is a supervised instruction-fine-tuned variant of the Olmo 3 7B base model, optimized for instruction-following, question-answering, and natural conversational dialogue. By leveraging high-quality instruction data and an open training pipeline, it delivers strong performance across everyday NLP tasks while remaining accessible and easy to integrate. Developed by Ai2 under the Apache 2.0 license, the model offers a transparent, community-friendly option for instruction-driven applications.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763758273 } }, { "id": "allenai/olmo-3-7b-think", "name": "AllenAI: Olmo 3 7B Think", "description": "Olmo 3 7B Think is a research-oriented language model in the Olmo family designed for advanced reasoning and instruction-driven tasks. It excels at multi-step problem solving, logical inference, and maintaining coherent conversational context. Developed by Ai2 under the Apache 2.0 license, Olmo 3 7B Think supports transparent, fully open experimentation and provides a lightweight yet capable foundation for academic research and practical NLP workflows.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763758270 } }, { "id": "alpindale/goliath-120b", "name": "Goliath 120B", "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 6144, "pricing": { "input": { "per_million_tokens": 6, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1699574400 } }, { "id": "amazon/nova-lite-v1", "name": "Amazon: Nova Lite 1.0", "description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite can handle real-time customer interactions, document analysis, and visual question-answering tasks with high accuracy.\n\nWith an input context of 300K tokens, it can analyze multiple images or up to 30 minutes of video in a single input.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 300000, "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.24, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1733437363 } }, { "id": "amazon/nova-micro-v1", "name": "Amazon: Nova Micro 1.0", "description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length of 128K tokens and optimized for speed and cost, Amazon Nova Micro excels at tasks such as text summarization, translation, content classification, interactive chat, and brainstorming. It has simple mathematical reasoning and coding abilities.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.035, "currency": "USD" }, "output": { "per_million_tokens": 0.14, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1733437237 } }, { "id": "amazon/nova-premier-v1", "name": "Amazon: Nova Premier 1.0", "description": "Amazon Nova Premier is the most capable of Amazon’s multimodal models for complex reasoning tasks and for use as the best teacher for distilling custom models.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 12.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1761950332 } }, { "id": "amazon/nova-pro-v1", "name": "Amazon: Nova Pro 1.0", "description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December 2024, it achieves state-of-the-art performance on key benchmarks including visual question answering (TextVQA) and video understanding (VATEX).\n\nAmazon Nova Pro demonstrates strong capabilities in processing both visual and textual information and at analyzing financial documents.\n\n**NOTE**: Video input is not supported at this time.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 300000, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 3.1999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1733436303 } }, { "id": "anthracite-org/magnum-v4-72b", "name": "Magnum v4 72B", "description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).\n\nThe model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16384, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1729555200 } }, { "id": "anthropic/claude-3-haiku", "name": "Anthropic: Claude 3 Haiku", "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1710288000 } }, { "id": "anthropic/claude-3-opus", "name": "Anthropic: Claude 3 Opus", "description": "Claude 3 Opus is Anthropic's most powerful model for highly complex tasks. It boasts top-level performance, intelligence, fluency, and understanding.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-family)\n\n#multimodal", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 75, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1709596800 } }, { "id": "anthropic/claude-3.5-haiku", "name": "Anthropic: Claude 3.5 Haiku", "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.\n\nThis makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.\n\nThis model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1730678400 } }, { "id": "anthropic/claude-3.5-haiku-20241022", "name": "Anthropic: Claude 3.5 Haiku (2024-10-22)", "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.\n\nIt does not support image inputs.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1730678400 } }, { "id": "anthropic/claude-3.5-sonnet", "name": "Anthropic: Claude 3.5 Sonnet", "description": "New Claude 3.5 Sonnet delivers better-than-Opus capabilities, faster-than-Sonnet speeds, at the same Sonnet prices. Sonnet is particularly good at:\n\n- Coding: Scores ~49% on SWE-Bench Verified, higher than the last best score, and without any fancy prompt scaffolding\n- Data science: Augments human data science expertise; navigates unstructured data while using multiple tools for insights\n- Visual processing: excelling at interpreting charts, graphs, and images, accurately transcribing text to derive insights beyond just the text alone\n- Agentic tasks: exceptional tool use, making it great at agentic tasks (i.e. complex, multi-step problem solving tasks that require engaging with other systems)\n\n#multimodal", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1729555200 } }, { "id": "anthropic/claude-3.7-sonnet", "name": "Anthropic: Claude 3.7 Sonnet", "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1740422110 } }, { "id": "anthropic/claude-3.7-sonnet:thinking", "name": "Anthropic: Claude 3.7 Sonnet (thinking)", "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1740422110 } }, { "id": "anthropic/claude-haiku-4.5", "name": "Anthropic: Claude Haiku 4.5", "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\n\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760547638 } }, { "id": "anthropic/claude-opus-4", "name": "Anthropic: Claude Opus 4", "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 75, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1747931245 } }, { "id": "anthropic/claude-opus-4.1", "name": "Anthropic: Claude Opus 4.1", "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 75, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754411591 } }, { "id": "anthropic/claude-opus-4.5", "name": "Anthropic: Claude Opus 4.5", "description": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection. The model is designed to operate efficiently across varied effort levels, enabling developers to trade off speed, depth, and token usage depending on task requirements. It comes with a new parameter to control token efficiency, which can be accessed using the OpenRouter Verbosity parameter with low, medium, or high.\n\nOpus 4.5 supports advanced tool use, extended context management, and coordinated multi-agent setups, making it well-suited for autonomous research, debugging, multi-step planning, and spreadsheet/browser manipulation. It delivers substantial gains in structured reasoning, execution reliability, and alignment compared to prior Opus generations, while reducing token overhead and improving performance on long-running tasks.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 25, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1764010580 } }, { "id": "anthropic/claude-sonnet-4", "name": "Anthropic: Claude Sonnet 4", "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%), Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions. Sonnet 4 is optimized for practical everyday use, providing advanced reasoning capabilities while maintaining efficiency and responsiveness in diverse internal and external scenarios.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1747930371 } }, { "id": "anthropic/claude-sonnet-4.5", "name": "Anthropic: Claude Sonnet 4.5", "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with improvements across system design, code security, and specification adherence. The model is designed for extended autonomous operation, maintaining task continuity across sessions and providing fact-based progress tracking.\n\nSonnet 4.5 also introduces stronger agentic capabilities, including improved tool orchestration, speculative parallel execution, and more efficient context and memory management. With enhanced context tracking and awareness of token usage across tool calls, it is particularly well-suited for multi-context and long-running workflows. Use cases span software engineering, cybersecurity, financial analysis, research agents, and other domains requiring sustained reasoning and tool use.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1759161676 } }, { "id": "arcee-ai/coder-large", "name": "Arcee AI: Coder Large", "description": "Coder‑Large is a 32 B‑parameter offspring of Qwen 2.5‑Instruct that has been further trained on permissively‑licensed GitHub, CodeSearchNet and synthetic bug‑fix corpora. It supports a 32k context window, enabling multi‑file refactoring or long diff review in a single call, and understands 30‑plus programming languages with special attention to TypeScript, Go and Terraform. Internal benchmarks show 5–8 pt gains over CodeLlama‑34 B‑Python on HumanEval and competitive BugFix scores thanks to a reinforcement pass that rewards compilable output. The model emits structured explanations alongside code blocks by default, making it suitable for educational tooling as well as production copilot scenarios. Cost‑wise, Together AI prices it well below proprietary incumbents, so teams can scale interactive coding without runaway spend. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 0.7999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1746478663 } }, { "id": "arcee-ai/maestro-reasoning", "name": "Arcee AI: Maestro Reasoning", "description": "Maestro Reasoning is Arcee's flagship analysis model: a 32 B‑parameter derivative of Qwen 2.5‑32 B tuned with DPO and chain‑of‑thought RL for step‑by‑step logic. Compared to the earlier 7 B preview, the production 32 B release widens the context window to 128 k tokens and doubles pass‑rate on MATH and GSM‑8K, while also lifting code completion accuracy. Its instruction style encourages structured \"thought → answer\" traces that can be parsed or hidden according to user preference. That transparency pairs well with audit‑focused industries like finance or healthcare where seeing the reasoning path matters. In Arcee Conductor, Maestro is automatically selected for complex, multi‑constraint queries that smaller SLMs bounce. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.8999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 3.3000000000000003, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1746481269 } }, { "id": "arcee-ai/spotlight", "name": "Arcee AI: Spotlight", "description": "Spotlight is a 7‑billion‑parameter vision‑language model derived from Qwen 2.5‑VL and fine‑tuned by Arcee AI for tight image‑text grounding tasks. It offers a 32 k‑token context window, enabling rich multimodal conversations that combine lengthy documents with one or more images. Training emphasized fast inference on consumer GPUs while retaining strong captioning, visual‐question‑answering, and diagram‑analysis accuracy. As a result, Spotlight slots neatly into agent workflows where screenshots, charts or UI mock‑ups need to be interpreted on the fly. Early benchmarks show it matching or out‑scoring larger VLMs such as LLaVA‑1.6 13 B on popular VQA and POPE alignment tests. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 0.18, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1746481552 } }, { "id": "arcee-ai/virtuoso-large", "name": "Arcee AI: Virtuoso Large", "description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k context inherited from Qwen 2.5, letting it ingest books, codebases or financial filings wholesale. Training blended DeepSeek R1 distillation, multi‑epoch supervised fine‑tuning and a final DPO/RLHF alignment stage, yielding strong performance on BIG‑Bench‑Hard, GSM‑8K and long‑context Needle‑In‑Haystack tests. Enterprises use Virtuoso‑Large as the \"fallback\" brain in Conductor pipelines when other SLMs flag low confidence. Despite its size, aggressive KV‑cache optimizations keep first‑token latency in the low‑second range on 8× H100 nodes, making it a practical production‑grade powerhouse.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.75, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1746478885 } }, { "id": "arliai/qwq-32b-arliai-rpr-v1", "name": "ArliAI: QwQ 32B RpR v1", "description": "QwQ-32B-ArliAI-RpR-v1 is a 32B parameter model fine-tuned from Qwen/QwQ-32B using a curated creative writing and roleplay dataset originally developed for the RPMax series. It is designed to maintain coherence and reasoning across long multi-turn conversations by introducing explicit reasoning steps per dialogue turn, generated and refined using the base model itself.\n\nThe model was trained using RS-QLORA+ on 8K sequence lengths and supports up to 128K context windows (with practical performance around 32K). It is optimized for creative roleplay and dialogue generation, with an emphasis on minimizing cross-context repetition while preserving stylistic diversity.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744555982 } }, { "id": "baidu/ernie-4.5-21b-a3b", "name": "Baidu: ERNIE 4.5 21B A3B", "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 120000, "pricing": { "input": { "per_million_tokens": 0.056, "currency": "USD" }, "output": { "per_million_tokens": 0.224, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1755034167 } }, { "id": "baidu/ernie-4.5-21b-a3b-thinking", "name": "Baidu: ERNIE 4.5 21B A3B Thinking", "description": "ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation, and expert-level academic benchmarks.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.056, "currency": "USD" }, "output": { "per_million_tokens": 0.224, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760048887 } }, { "id": "baidu/ernie-4.5-vl-28b-a3b", "name": "Baidu: ERNIE 4.5 VL 28B A3B", "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 30000, "pricing": { "input": { "per_million_tokens": 0.112, "currency": "USD" }, "output": { "per_million_tokens": 0.448, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1755032836 } }, { "id": "baidu/ernie-4.5-vl-424b-a47b", "name": "Baidu: ERNIE 4.5 VL 424B A47B ", "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 123000, "pricing": { "input": { "per_million_tokens": 0.33599999999999997, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1751300903 } }, { "id": "bytedance/doubao-embedding-large-text-240915", "name": "ByteDance: Doubao Embedding Large Text (240915)", "description": "Doubao Embedding Large 是字节跳动语义向量化模型的最新升级版,模型以豆包语言模型为基座,具备强大的语言理解能力;主要面向向量检索的使用场景,支持中、英双语。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4000, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "bytedance/doubao-embedding-text-240715", "name": "ByteDance: Doubao Embedding", "description": "由字节跳动研发的语义向量化模型,主要面向向量检索的使用场景,支持中、英双语,最长 4K 上下文长度。向量维度 2048 维,支持 512、1024 降维使用。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4000, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "bytedance/doubao-embedding-vision-241215", "name": "ByteDance: Doubao Embedding Vision", "description": "Doubao-embedding-vision,全新升级图文多模态向量化模型,主要面向图文多模向量检索的使用场景,支持图片输入及中、英双语文本输入,最长 8K 上下文长度。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8000, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "bytedance/doubao-embedding-vision-250328", "name": "ByteDance: Doubao Embedding Vision", "description": "Doubao-embedding-vision,全新升级图文多模态向量化模型,主要面向图文多模向量检索的使用场景,支持图片输入及中、英双语文本输入,最长 8K 上下文长度。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8000, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "bytedance/doubao-seed-1.6", "name": "ByteDance: Doubao Seed 1.6", "description": "全新多模态深度思考模型,同时支持 thinking、non-thinking、auto三种思考模式。其中 non-thinking 模型对比 doubao-1.5-pro-32k-250115 模型大幅提升。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 1200000, "currency": "USD" }, "output": { "per_million_tokens": 16000000, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738402289 } }, { "id": "bytedance/doubao-seed-1.6-flash", "name": "ByteDance: Doubao Seed 1.6 Flash", "description": "有极致推理速度的多模态深度思考模型;同时支持文本和视觉理解。文本理解能力超过上一代 Lite 系列模型,视觉理解比肩友商 Pro 系列模型。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 300000, "currency": "USD" }, "output": { "per_million_tokens": 3000000, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738402289 } }, { "id": "bytedance/doubao-seed-1.6-thinking", "name": "ByteDance: Doubao Seed 1.6 Thinking", "description": "在思考能力上进行了大幅强化, 对比 doubao 1.5 代深度理解模型,在编程、数学、逻辑推理等基础能力上进一步提升, 支持视觉理解。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 1200000, "currency": "USD" }, "output": { "per_million_tokens": 16000000, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738402289 } }, { "id": "bytedance/ui-tars-1.5-7b", "name": "ByteDance: UI-TARS 7B ", "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement learning-based reasoning, enabling robust action planning and execution across virtual interfaces.\n\nThis model achieves state-of-the-art results on a range of interactive and grounding benchmarks, including OSworld, WebVoyager, AndroidWorld, and ScreenSpot. It also demonstrates perfect task completion across diverse Poki games and outperforms prior models in Minecraft agent tasks. UI-TARS-1.5 supports thought decomposition during inference and shows strong scaling across variants, with the 1.5 version notably exceeding the performance of earlier 72B and 7B checkpoints.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753205056 } }, { "id": "cohere/command-a", "name": "Cohere: Command A", "description": "Command A is an open-weights 111B parameter model with a 256k context window focused on delivering great performance across agentic, multilingual, and coding use cases.\nCompared to other leading proprietary and open-weights models Command A delivers maximum performance with minimum hardware costs, excelling on business-critical agentic and multilingual tasks.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741894342 } }, { "id": "cohere/command-r-08-2024", "name": "Cohere: Command R (08-2024)", "description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and is competitive with the previous version of the larger Command R+ model.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1724976000 } }, { "id": "cohere/command-r-plus-08-2024", "name": "Cohere: Command R+ (08-2024)", "description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same.\n\nRead the launch post [here](https://docs.cohere.com/changelog/command-gets-refreshed).\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1724976000 } }, { "id": "cohere/command-r7b-12-2024", "name": "Cohere: Command R7B (12-2024)", "description": "Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps.\n\nUse of this model is subject to Cohere's [Usage Policy](https://docs.cohere.com/docs/usage-policy) and [SaaS Agreement](https://cohere.com/saas-agreement).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.0375, "currency": "USD" }, "output": { "per_million_tokens": 0.15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1734158152 } }, { "id": "deepcogito/cogito-v2-preview-deepseek-671b", "name": "Deep Cogito: Cogito V2 Preview Deepseek 671B", "description": "Cogito v2 is a multilingual, instruction-tuned Mixture of Experts (MoE) large language model with 671 billion parameters. It supports both standard and reasoning-based generation modes. The model introduces hybrid reasoning via Iterated Distillation and Amplification (IDA)—an iterative self-improvement strategy designed to scale alignment with general intelligence. Cogito v2 has been optimized for STEM, programming, instruction following, and tool use. It supports 128k context length and offers strong performance in both multilingual and code-heavy environments. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1756830949 } }, { "id": "deepcogito/cogito-v2-preview-llama-109b-moe", "name": "Cogito V2 Preview Llama 109B", "description": "An instruction-tuned, hybrid-reasoning Mixture-of-Experts model built on Llama-4-Scout-17B-16E. Cogito v2 can answer directly or engage an extended “thinking” phase, with alignment guided by Iterated Distillation & Amplification (IDA). It targets coding, STEM, instruction following, and general helpfulness, with stronger multilingual, tool-calling, and reasoning performance than size-equivalent baselines. The model supports long-context use (up to 10M tokens) and standard Transformers workflows. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32767, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 0.59, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1756831568 } }, { "id": "deepcogito/cogito-v2-preview-llama-405b", "name": "Deep Cogito: Cogito V2 Preview Llama 405B", "description": "Cogito v2 405B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection. It represents a significant step toward frontier intelligence with dense architecture delivering performance competitive with leading closed models. This advanced reasoning system combines policy improvement with massive scale for exceptional capabilities.\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 3.5, "currency": "USD" }, "output": { "per_million_tokens": 3.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760709933 } }, { "id": "deepcogito/cogito-v2-preview-llama-70b", "name": "Deep Cogito: Cogito V2 Preview Llama 70B", "description": "Cogito v2 70B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection. Built with iterative policy improvement, it delivers strong performance across reasoning tasks while maintaining efficiency through shorter reasoning chains and improved intuition.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.88, "currency": "USD" }, "output": { "per_million_tokens": 0.88, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1756831784 } }, { "id": "deepcogito/cogito-v2.1-671b", "name": "Deep Cogito: Cogito v2.1 671B", "description": "Cogito v2.1 671B MoE represents one of the strongest open models globally, matching performance of frontier closed and open models. This model is trained using self play with reinforcement learning to reach state-of-the-art performance on multiple categories (instruction following, coding, longer queries and creative writing). This advanced system demonstrates significant progress toward scalable superintelligence through policy improvement.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763071233 } }, { "id": "deepseek/deepseek-chat", "name": "DeepSeek: DeepSeek V3", "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1735241320 } }, { "id": "deepseek/deepseek-chat-v3-0324", "name": "DeepSeek: DeepSeek V3 0324", "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.88, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1742824755 } }, { "id": "deepseek/deepseek-chat-v3.1", "name": "DeepSeek: DeepSeek V3.1", "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.7999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1755779628 } }, { "id": "deepseek/deepseek-prover-v2", "name": "DeepSeek: DeepSeek Prover V2", "description": "DeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics. Likely an upgrade from [DeepSeek-Prover-V1.5](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V1.5-RL) Not much is known about the model yet, as DeepSeek released it on Hugging Face without an announcement or description.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 2.1799999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1746013094 } }, { "id": "deepseek/deepseek-r1", "name": "DeepSeek: R1", "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1737381095 } }, { "id": "deepseek/deepseek-r1-0528", "name": "DeepSeek: R1 0528", "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 4.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1748455170 } }, { "id": "deepseek/deepseek-r1-0528-qwen3-8b", "name": "DeepSeek: DeepSeek R1 0528 Qwen3 8B", "description": "DeepSeek-R1-0528 is a lightly upgraded release of DeepSeek R1 that taps more compute and smarter post-training tricks, pushing its reasoning and inference to the brink of flagship models like O3 and Gemini 2.5 Pro.\nIt now tops math, programming, and logic leaderboards, showcasing a step-change in depth-of-thought.\nThe distilled variant, DeepSeek-R1-0528-Qwen3-8B, transfers this chain-of-thought into an 8 B-parameter form, beating standard Qwen3 8B by +10 pp and tying the 235 B “thinking” giant on AIME 2024.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1748538543 } }, { "id": "deepseek/deepseek-r1-distill-llama-70b", "name": "DeepSeek: R1 Distill Llama 70B", "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.13, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1737663169 } }, { "id": "deepseek/deepseek-r1-distill-qwen-14b", "name": "DeepSeek: R1 Distill Qwen 14B", "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 69.7\n- MATH-500 pass@1: 93.9\n- CodeForces Rating: 1481\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 0.12, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738193940 } }, { "id": "deepseek/deepseek-r1-distill-qwen-32b", "name": "DeepSeek: R1 Distill Qwen 32B", "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\\n\\nOther benchmark results include:\\n\\n- AIME 2024 pass@1: 72.6\\n- MATH-500 pass@1: 94.3\\n- CodeForces Rating: 1691\\n\\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 64000, "pricing": { "input": { "per_million_tokens": 0.24, "currency": "USD" }, "output": { "per_million_tokens": 0.24, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738194830 } }, { "id": "deepseek/deepseek-v3.1-terminus", "name": "DeepSeek: DeepSeek V3.1 Terminus", "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.216, "currency": "USD" }, "output": { "per_million_tokens": 0.7999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758548275 } }, { "id": "deepseek/deepseek-v3.1-terminus:exacto", "name": "DeepSeek: DeepSeek V3.1 Terminus (exacto)", "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.216, "currency": "USD" }, "output": { "per_million_tokens": 0.7999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758548275 } }, { "id": "deepseek/deepseek-v3.2-exp", "name": "DeepSeek: DeepSeek V3.2 Exp", "description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model was trained under conditions aligned with V3.1-Terminus to enable direct comparison. Benchmarking shows performance roughly on par with V3.1 across reasoning, coding, and agentic tool-use tasks, with minor tradeoffs and gains depending on the domain. This release focuses on validating architectural optimizations for extended context lengths rather than advancing raw task accuracy, making it primarily a research-oriented model for exploring efficient transformer designs.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.216, "currency": "USD" }, "output": { "per_million_tokens": 0.328, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1759150481 } }, { "id": "eleutherai/llemma_7b", "name": "EleutherAI: Llemma 7b", "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens. Llemma models are particularly strong at chain-of-thought mathematical reasoning and using computational tools for mathematics, such as Python and formal theorem provers.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744643225 } }, { "id": "google/gemini-2.0-flash-001", "name": "Google: Gemini 2.0 Flash", "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738769413 } }, { "id": "google/gemini-2.0-flash-lite-001", "name": "Google: Gemini 2.0 Flash Lite", "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 0.075, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1740506212 } }, { "id": "google/gemini-2.5-flash", "name": "Google: Gemini 2.5 Flash", "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1750172488 } }, { "id": "google/gemini-2.5-flash-image", "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1759870431 } }, { "id": "google/gemini-2.5-flash-image-preview", "name": "Google: Gemini 2.5 Flash Image Preview (Nano Banana)", "description": "Gemini 2.5 Flash Image Preview, a.k.a. \"Nano Banana,\" is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1756218977 } }, { "id": "google/gemini-2.5-flash-lite", "name": "Google: Gemini 2.5 Flash Lite", "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753200276 } }, { "id": "google/gemini-2.5-flash-lite-preview-09-2025", "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758819686 } }, { "id": "google/gemini-2.5-flash-preview-09-2025", "name": "Google: Gemini 2.5 Flash Preview 09-2025", "description": "Gemini 2.5 Flash Preview September 2025 Checkpoint is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 2.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758820178 } }, { "id": "google/gemini-2.5-pro", "name": "Google: Gemini 2.5 Pro", "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1750169544 } }, { "id": "google/gemini-2.5-pro-preview", "name": "Google: Gemini 2.5 Pro Preview 06-05", "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1749137257 } }, { "id": "google/gemini-2.5-pro-preview-05-06", "name": "Google: Gemini 2.5 Pro Preview 05-06", "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1746578513 } }, { "id": "google/gemini-3-pro-image-preview", "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)", "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\n\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 12, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763653797 } }, { "id": "google/gemini-3-pro-preview", "name": "Google: Gemini 3 Pro Preview", "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 12, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763474668 } }, { "id": "google/gemma-2-9b-it", "name": "Google: Gemma 2 9B", "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.09, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1719532800 } }, { "id": "google/gemma-3-12b-it", "name": "Google: Gemma 3 12B", "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741902625 } }, { "id": "google/gemma-3-4b-it", "name": "Google: Gemma 3 4B", "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 96000, "pricing": { "input": { "per_million_tokens": 0.01703012, "currency": "USD" }, "output": { "per_million_tokens": 0.0681536, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741905510 } }, { "id": "google/gemma-3n-e4b-it", "name": "Google: Gemma 3n 4B", "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.\n\nThis model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions. [Read more in the blog post](https://developers.googleblog.com/en/introducing-gemma-3n/)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.04, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1747776824 } }, { "id": "ibm-granite/granite-4.0-h-micro", "name": "IBM: Granite 4.0 Micro", "description": "Granite-4.0-H-Micro is a 3B parameter from the Granite 4 family of models. These models are the latest in a series of models released by IBM. They are fine-tuned for long context tool calling. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131000, "pricing": { "input": { "per_million_tokens": 0.017, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760927695 } }, { "id": "inception/mercury", "name": "Inception: Mercury", "description": "Mercury is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like GPT-4.1 Nano and Claude 3.5 Haiku while matching their performance. Mercury's speed enables developers to provide responsive user experiences, including with voice agents, search interfaces, and chatbots. Read more in the [blog post]\n(https://www.inceptionlabs.ai/blog/introducing-mercury) here. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1750973026 } }, { "id": "inception/mercury-coder", "name": "Inception: Mercury Coder", "description": "Mercury Coder is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like Claude 3.5 Haiku and GPT-4o Mini while matching their performance. Mercury Coder's speed means that developers can stay in the flow while coding, enjoying rapid chat-based iteration and responsive code completion suggestions. On Copilot Arena, Mercury Coder ranks 1st in speed and ties for 2nd in quality. Read more in the [blog post here](https://www.inceptionlabs.ai/blog/introducing-mercury).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1746033880 } }, { "id": "inflection/inflection-3-pi", "name": "Inflection: Inflection 3 Pi", "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay.\n\nPi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1728604800 } }, { "id": "inflection/inflection-3-productivity", "name": "Inflection: Inflection 3 Productivity", "description": "Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news.\n\nFor emotional intelligence similar to Pi, see [Inflect 3 Pi](/inflection/inflection-3-pi)\n\nSee [Inflection's announcement](https://inflection.ai/blog/enterprise) for more details.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1728604800 } }, { "id": "liquid/lfm-2.2-6b", "name": "LiquidAI/LFM2-2.6B", "description": "LFM2 is a new generation of hybrid models developed by Liquid AI, specifically designed for edge AI and on-device deployment. It sets a new standard in terms of quality, speed, and memory efficiency.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760970889 } }, { "id": "liquid/lfm2-8b-a1b", "name": "LiquidAI/LFM2-8B-A1B", "description": "Model created via inbox interface", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760970984 } }, { "id": "mancer/weaver", "name": "Mancer: Weaver (alpha)", "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8000, "pricing": { "input": { "per_million_tokens": 1.125, "currency": "USD" }, "output": { "per_million_tokens": 1.125, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1690934400 } }, { "id": "meituan/longcat-flash-chat", "name": "Meituan: LongCat Flash Chat", "description": "LongCat-Flash-Chat is a large-scale Mixture-of-Experts (MoE) model with 560B total parameters, of which 18.6B–31.3B (≈27B on average) are dynamically activated per input. It introduces a shortcut-connected MoE design to reduce communication overhead and achieve high throughput while maintaining training stability through advanced scaling strategies such as hyperparameter transfer, deterministic computation, and multi-stage optimization.\n\nThis release, LongCat-Flash-Chat, is a non-thinking foundation model optimized for conversational and agentic tasks. It supports long context windows up to 128K tokens and shows competitive performance across reasoning, coding, instruction following, and domain benchmarks, with particular strengths in tool use and complex multi-step interactions.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.75, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1757427658 } }, { "id": "meta-llama/llama-3-70b-instruct", "name": "Meta: Llama 3 70B Instruct", "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1713398400 } }, { "id": "meta-llama/llama-3-8b-instruct", "name": "Meta: Llama 3 8B Instruct", "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.06, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1713398400 } }, { "id": "meta-llama/llama-3.1-405b", "name": "Meta: Llama 3.1 405B (base)", "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 4, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1722556800 } }, { "id": "meta-llama/llama-3.1-405b-instruct", "name": "Meta: Llama 3.1 405B Instruct", "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 130815, "pricing": { "input": { "per_million_tokens": 3.5, "currency": "USD" }, "output": { "per_million_tokens": 3.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1721692800 } }, { "id": "meta-llama/llama-3.1-70b-instruct", "name": "Meta: Llama 3.1 70B Instruct", "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1721692800 } }, { "id": "meta-llama/llama-3.1-8b-instruct", "name": "Meta: Llama 3.1 8B Instruct", "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.03, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1721692800 } }, { "id": "meta-llama/llama-3.2-11b-vision-instruct", "name": "Meta: Llama 3.2 11B Vision Instruct", "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.049, "currency": "USD" }, "output": { "per_million_tokens": 0.049, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1727222400 } }, { "id": "meta-llama/llama-3.2-1b-instruct", "name": "Meta: Llama 3.2 1B Instruct", "description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate efficiently in low-resource environments while maintaining strong task performance.\n\nSupporting eight core languages and fine-tunable for more, Llama 1.3B is ideal for businesses or developers seeking lightweight yet powerful AI solutions that can operate in diverse multilingual settings without the high computational demand of larger models.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 60000, "pricing": { "input": { "per_million_tokens": 0.027, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1727222400 } }, { "id": "meta-llama/llama-3.2-3b-instruct", "name": "Meta: Llama 3.2 3B Instruct", "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.02, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1727222400 } }, { "id": "meta-llama/llama-3.3-70b-instruct", "name": "Meta: Llama 3.3 70B Instruct", "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.10400000000000001, "currency": "USD" }, "output": { "per_million_tokens": 0.312, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1733506137 } }, { "id": "meta-llama/llama-4-maverick", "name": "Meta: Llama 4 Maverick", "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 0.136, "currency": "USD" }, "output": { "per_million_tokens": 0.6799999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1743881822 } }, { "id": "meta-llama/llama-4-scout", "name": "Meta: Llama 4 Scout", "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 327680, "pricing": { "input": { "per_million_tokens": 0.08, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1743881519 } }, { "id": "meta-llama/llama-guard-2-8b", "name": "Meta: LlamaGuard 2 8B", "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1715558400 } }, { "id": "meta-llama/llama-guard-3-8b", "name": "Llama Guard 3 8B", "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.06, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1739401318 } }, { "id": "meta-llama/llama-guard-4-12b", "name": "Meta: Llama Guard 4 12B", "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 0.18, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1745975193 } }, { "id": "microsoft/mai-ds-r1", "name": "Microsoft: MAI DS R1", "description": "MAI-DS-R1 is a post-trained variant of DeepSeek-R1 developed by the Microsoft AI team to improve the model’s responsiveness on previously blocked topics while enhancing its safety profile. Built on top of DeepSeek-R1’s reasoning foundation, it integrates 110k examples from the Tulu-3 SFT dataset and 350k internally curated multilingual safety-alignment samples. The model retains strong reasoning, coding, and problem-solving capabilities, while unblocking a wide range of prompts previously restricted in R1.\n\nMAI-DS-R1 demonstrates improved performance on harm mitigation benchmarks and maintains competitive results across general reasoning tasks. It surpasses R1-1776 in satisfaction metrics for blocked queries and reduces leakage in harmful content categories. The model is based on a transformer MoE architecture and is suitable for general-purpose use cases, excluding high-stakes domains such as legal, medical, or autonomous systems.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1745194100 } }, { "id": "microsoft/phi-3-medium-128k-instruct", "name": "Microsoft: Phi-3 Medium 128K Instruct", "description": "Phi-3 128K Medium is a powerful 14-billion parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. In the MMLU-Pro eval, the model even comes close to a Llama3 70B level of performance.\n\nFor 4k context length, try [Phi-3 Medium 4K](/models/microsoft/phi-3-medium-4k-instruct).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1716508800 } }, { "id": "microsoft/phi-3-mini-128k-instruct", "name": "Microsoft: Phi-3 Mini 128K Instruct", "description": "Phi-3 Mini is a powerful 3.8B parameter model designed for advanced language understanding, reasoning, and instruction following. Optimized through supervised fine-tuning and preference adjustments, it excels in tasks involving common sense, mathematics, logical reasoning, and code processing.\n\nAt time of release, Phi-3 Medium demonstrated state-of-the-art performance among lightweight models. This model is static, trained on an offline dataset with an October 2023 cutoff date.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1716681600 } }, { "id": "microsoft/phi-3.5-mini-128k-instruct", "name": "Microsoft: Phi-3.5 Mini 128K Instruct", "description": "Phi-3.5 models are lightweight, state-of-the-art open models. These models were trained with Phi-3 datasets that include both synthetic data and the filtered, publicly available websites data, with a focus on high quality and reasoning-dense properties. Phi-3.5 Mini uses 3.8B parameters, and is a dense decoder-only transformer model using the same tokenizer as [Phi-3 Mini](/models/microsoft/phi-3-mini-128k-instruct).\n\nThe models underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks that test common sense, language understanding, math, code, long context and logical reasoning, Phi-3.5 models showcased robust and state-of-the-art performance among models with less than 13 billion parameters.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1724198400 } }, { "id": "microsoft/phi-4", "name": "Microsoft: Phi 4", "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16384, "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.14, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1736489872 } }, { "id": "microsoft/phi-4-multimodal-instruct", "name": "Microsoft: Phi 4 Multimodal Instruct", "description": "Phi-4 Multimodal Instruct is a versatile 5.6B parameter foundation model that combines advanced reasoning and instruction-following capabilities across both text and visual inputs, providing accurate text outputs. The unified architecture enables efficient, low-latency inference, suitable for edge and mobile deployments. Phi-4 Multimodal Instruct supports text inputs in multiple languages including Arabic, Chinese, English, French, German, Japanese, Spanish, and more, with visual input optimized primarily for English. It delivers impressive performance on multimodal tasks involving mathematical, scientific, and document reasoning, providing developers and enterprises a powerful yet compact model for sophisticated interactive applications. For more information, see the [Phi-4 Multimodal blog post](https://azure.microsoft.com/en-us/blog/empowering-innovation-the-next-generation-of-the-phi-family/).\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741396284 } }, { "id": "microsoft/phi-4-reasoning-plus", "name": "Microsoft: Phi 4 Reasoning Plus", "description": "Phi-4-reasoning-plus is an enhanced 14B parameter model from Microsoft, fine-tuned from Phi-4 with additional reinforcement learning to boost accuracy on math, science, and code reasoning tasks. It uses the same dense decoder-only transformer architecture as Phi-4, but generates longer, more comprehensive outputs structured into a step-by-step reasoning trace and final answer.\n\nWhile it offers improved benchmark scores over Phi-4-reasoning across tasks like AIME, OmniMath, and HumanEvalPlus, its responses are typically ~50% longer, resulting in higher latency. Designed for English-only applications, it is well-suited for structured reasoning workflows where output quality takes priority over response speed.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.07, "currency": "USD" }, "output": { "per_million_tokens": 0.35, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1746130961 } }, { "id": "microsoft/wizardlm-2-8x22b", "name": "WizardLM-2 8x22B", "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 0.48, "currency": "USD" }, "output": { "per_million_tokens": 0.48, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1713225600 } }, { "id": "minimax/minimax-01", "name": "MiniMax: MiniMax-01", "description": "MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding. It has 456 billion parameters, with 45.9 billion parameters activated per inference, and can handle a context of up to 4 million tokens.\n\nThe text model adopts a hybrid architecture that combines Lightning Attention, Softmax Attention, and Mixture-of-Experts (MoE). The image model adopts the “ViT-MLP-LLM” framework and is trained on top of the text model.\n\nTo read more about the release, see: https://www.minimaxi.com/en/news/minimax-01-series-2", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000192, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 1.1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1736915462 } }, { "id": "minimax/minimax-m1", "name": "MiniMax: MiniMax M1", "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 2.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1750200414 } }, { "id": "minimax/minimax-m2", "name": "MiniMax: MiniMax M2", "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 204800, "pricing": { "input": { "per_million_tokens": 0.255, "currency": "USD" }, "output": { "per_million_tokens": 1.02, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1761252093 } }, { "id": "mistralai/codestral-2501", "name": "Mistral: Codestral 2501", "description": "[Mistral](/mistralai)'s cutting-edge language model for coding. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation. \n\nLearn more on their blog post: https://mistral.ai/news/codestral-2501/", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.8999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1736895522 } }, { "id": "mistralai/codestral-2508", "name": "Mistral: Codestral 2508", "description": "Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.8999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754079630 } }, { "id": "mistralai/devstral-medium", "name": "Mistral: Devstral Medium", "description": "Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI. Positioned as a step up from Devstral Small, it achieves 61.6% on SWE-Bench Verified, placing it ahead of Gemini 2.5 Pro and GPT-4.1 in code-related tasks, at a fraction of the cost. It is designed for generalization across prompt styles and tool use in code agents and frameworks.\n\nDevstral Medium is available via API only (not open-weight), and supports enterprise deployment on private infrastructure, with optional fine-tuning capabilities.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1752161321 } }, { "id": "mistralai/devstral-small", "name": "Mistral: Devstral Small 1.1", "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and released under the Apache 2.0 license, it features a 128k token context window and supports both Mistral-style function calling and XML output formats.\n\nDesigned for agentic coding workflows, Devstral Small 1.1 is optimized for tasks such as codebase exploration, multi-file edits, and integration into autonomous development agents like OpenHands and Cline. It achieves 53.6% on SWE-Bench Verified, surpassing all other open models on this benchmark, while remaining lightweight enough to run on a single 4090 GPU or Apple silicon machine. The model uses a Tekken tokenizer with a 131k vocabulary and is deployable via vLLM, Transformers, Ollama, LM Studio, and other OpenAI-compatible runtimes.\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.07, "currency": "USD" }, "output": { "per_million_tokens": 0.28, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1752160751 } }, { "id": "mistralai/devstral-small-2505", "name": "Mistral: Devstral Small 2505", "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.12, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1747837379 } }, { "id": "mistralai/magistral-medium-2506", "name": "Mistral: Magistral Medium 2506", "description": "Magistral is Mistral's first reasoning model. It is ideal for general purpose use requiring longer thought processing and better accuracy than with non-reasoning LLMs. From legal research and financial forecasting to software development and creative storytelling — this model solves multi-step challenges where transparency and precision are critical.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1749354054 } }, { "id": "mistralai/magistral-medium-2506:thinking", "name": "Mistral: Magistral Medium 2506 (thinking)", "description": "Magistral is Mistral's first reasoning model. It is ideal for general purpose use requiring longer thought processing and better accuracy than with non-reasoning LLMs. From legal research and financial forecasting to software development and creative storytelling — this model solves multi-step challenges where transparency and precision are critical.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1749354054 } }, { "id": "mistralai/magistral-small-2506", "name": "Mistral: Magistral Small 2506", "description": "Magistral Small is a 24B parameter instruction-tuned model based on Mistral-Small-3.1 (2503), enhanced through supervised fine-tuning on traces from Magistral Medium and further refined via reinforcement learning. It is optimized for reasoning and supports a wide multilingual range, including over 20 languages.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40000, "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1749569561 } }, { "id": "mistralai/ministral-3b", "name": "Mistral: Ministral 3B", "description": "Ministral 3B is a 3B parameter model optimized for on-device and edge computing. It excels in knowledge, commonsense reasoning, and function-calling, outperforming larger models like Mistral 7B on most benchmarks. Supporting up to 128k context length, it’s ideal for orchestrating agentic workflows and specialist tasks with efficient inference.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.04, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1729123200 } }, { "id": "mistralai/ministral-8b", "name": "Mistral: Ministral 8B", "description": "Ministral 8B is an 8B parameter model featuring a unique interleaved sliding-window attention pattern for faster, memory-efficient inference. Designed for edge use cases, it supports up to 128k context length and excels in knowledge and reasoning tasks. It outperforms peers in the sub-10B category, making it perfect for low-latency, privacy-first applications.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1729123200 } }, { "id": "mistralai/mistral-7b-instruct", "name": "Mistral: Mistral 7B Instruct", "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.028, "currency": "USD" }, "output": { "per_million_tokens": 0.054, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1716768000 } }, { "id": "mistralai/mistral-7b-instruct-v0.1", "name": "Mistral: Mistral 7B Instruct v0.1", "description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 2824, "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.19, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1695859200 } }, { "id": "mistralai/mistral-7b-instruct-v0.2", "name": "Mistral: Mistral 7B Instruct v0.2", "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1703721600 } }, { "id": "mistralai/mistral-7b-instruct-v0.3", "name": "Mistral: Mistral 7B Instruct v0.3", "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1716768000 } }, { "id": "mistralai/mistral-large", "name": "Mistral Large", "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1708905600 } }, { "id": "mistralai/mistral-large-2407", "name": "Mistral Large 2407", "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1731978415 } }, { "id": "mistralai/mistral-large-2411", "name": "Mistral Large 2411", "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1731978685 } }, { "id": "mistralai/mistral-medium-3", "name": "Mistral: Mistral Medium 3", "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1746627341 } }, { "id": "mistralai/mistral-medium-3.1", "name": "Mistral: Mistral Medium 3.1", "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3.1 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1755095639 } }, { "id": "mistralai/mistral-nemo", "name": "Mistral: Mistral Nemo", "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.02, "currency": "USD" }, "output": { "per_million_tokens": 0.04, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1721347200 } }, { "id": "mistralai/mistral-saba", "name": "Mistral: Saba", "description": "Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance. Trained on curated regional datasets, it supports multiple Indian-origin languages—including Tamil and Malayalam—alongside Arabic. This makes it a versatile option for a range of regional and multilingual applications. Read more at the blog post [here](https://mistral.ai/en/news/mistral-saba)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1739803239 } }, { "id": "mistralai/mistral-small", "name": "Mistral Small", "description": "With 22 billion parameters, Mistral Small v24.09 offers a convenient mid-point between (Mistral NeMo 12B)[/mistralai/mistral-nemo] and (Mistral Large 2)[/mistralai/mistral-large], providing a cost-effective solution that can be deployed across various platforms and environments. It has better reasoning, exhibits more capabilities, can produce and reason about code, and is multiligual, supporting English, French, German, Italian, and Spanish.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1704844800 } }, { "id": "mistralai/mistral-small-24b-instruct-2501", "name": "Mistral: Mistral Small 3", "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.08, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738255409 } }, { "id": "mistralai/mistral-small-3.1-24b-instruct", "name": "Mistral: Mistral Small 3.1 24B", "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1742238937 } }, { "id": "mistralai/mistral-small-3.2-24b-instruct", "name": "Mistral: Mistral Small 3.2 24B", "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.18, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1750443016 } }, { "id": "mistralai/mistral-tiny", "name": "Mistral Tiny", "description": "Note: This model is being deprecated. Recommended replacement is the newer [Ministral 8B](/mistral/ministral-8b)\n\nThis model is currently powered by Mistral-7B-v0.2, and incorporates a \"better\" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 0.25, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1704844800 } }, { "id": "mistralai/mixtral-8x22b-instruct", "name": "Mistral: Mixtral 8x22B Instruct", "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:\n- strong math, coding, and reasoning\n- large context length (64k)\n- fluency in English, French, Italian, German, and Spanish\n\nSee benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).\n#moe", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1713312000 } }, { "id": "mistralai/mixtral-8x7b-instruct", "name": "Mistral: Mixtral 8x7B Instruct", "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.54, "currency": "USD" }, "output": { "per_million_tokens": 0.54, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1702166400 } }, { "id": "mistralai/pixtral-12b", "name": "Mistral: Pixtral 12B", "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1725926400 } }, { "id": "mistralai/pixtral-large-2411", "name": "Mistral: Pixtral Large 2411", "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images.\n\nThe model is available under the Mistral Research License (MRL) for research and educational use, and the Mistral Commercial License for experimentation, testing, and production for commercial purposes.\n\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1731977388 } }, { "id": "mistralai/voxtral-small-24b-2507", "name": "Mistral: Voxtral Small 24B 2507", "description": "Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding. Input audio is priced at $100 per million seconds.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32000, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1761835144 } }, { "id": "moonshotai/kimi-k2", "name": "MoonshotAI: Kimi K2 0711", "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.456, "currency": "USD" }, "output": { "per_million_tokens": 1.8399999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1752263252 } }, { "id": "moonshotai/kimi-k2-0905", "name": "MoonshotAI: Kimi K2 0905", "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.39, "currency": "USD" }, "output": { "per_million_tokens": 1.9, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1757021147 } }, { "id": "moonshotai/kimi-k2-0905:exacto", "name": "MoonshotAI: Kimi K2 0905 (exacto)", "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 2.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1757021147 } }, { "id": "moonshotai/kimi-k2-thinking", "name": "MoonshotAI: Kimi K2 Thinking", "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.44999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 2.35, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1762440622 } }, { "id": "moonshotai/kimi-linear-48b-a3b-instruct", "name": "MoonshotAI: Kimi Linear 48B A3B Instruct", "description": "Kimi Linear is a hybrid linear attention architecture that outperforms traditional full attention methods across various contexts, including short, long, and reinforcement learning (RL) scaling regimes. At its core is Kimi Delta Attention (KDA)—a refined version of Gated DeltaNet that introduces a more efficient gating mechanism to optimize the use of finite-state RNN memory.\n\nKimi Linear achieves superior performance and hardware efficiency, especially for long-context tasks. It reduces the need for large KV caches by up to 75% and boosts decoding throughput by up to 6x for contexts as long as 1M tokens.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1762565833 } }, { "id": "morph/morph-v3-fast", "name": "Morph: Morph V3 Fast", "description": "Morph's fastest apply model for code edits. ~10,500 tokens/sec with 96% accuracy for rapid code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 81920, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1751910002 } }, { "id": "morph/morph-v3-large", "name": "Morph: Morph V3 Large", "description": "Morph's high-accuracy apply model for complex code edits. ~4,500 tokens/sec with 98% accuracy for precise code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.8999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 1.9, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1751910858 } }, { "id": "neversleep/llama-3.1-lumimaid-8b", "name": "NeverSleep: Lumimaid v0.2 8B", "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/models/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.09, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1726358400 } }, { "id": "neversleep/noromaid-20b", "name": "Noromaid 20B", "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.\n\n#merge #uncensored", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 1.75, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1700956800 } }, { "id": "nousresearch/deephermes-3-mistral-24b-preview", "name": "Nous: DeepHermes 3 Mistral 24B Preview", "description": "DeepHermes 3 (Mistral 24B Preview) is an instruction-tuned language model by Nous Research based on Mistral-Small-24B, designed for chat, function calling, and advanced multi-turn reasoning. It introduces a dual-mode system that toggles between intuitive chat responses and structured “deep reasoning” mode using special system prompts. Fine-tuned via distillation from R1, it supports structured output (JSON mode) and function call syntax for agent-based applications.\n\nDeepHermes 3 supports a **reasoning toggle via system prompt**, allowing users to switch between fast, intuitive responses and deliberate, multi-step reasoning. When activated with the following specific system instruction, the model enters a *\"deep thinking\"* mode—generating extended chains of thought wrapped in `` tags before delivering a final answer. \n\nSystem Prompt: You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1746830904 } }, { "id": "nousresearch/hermes-2-pro-llama-3-8b", "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.024999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.08, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1716768000 } }, { "id": "nousresearch/hermes-3-llama-3.1-405b", "name": "Nous: Hermes 3 405B Instruct", "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1723766400 } }, { "id": "nousresearch/hermes-3-llama-3.1-70b", "name": "Nous: Hermes 3 70B Instruct", "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1723939200 } }, { "id": "nousresearch/hermes-4-405b", "name": "Nous: Hermes 4 405B", "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with ... traces or respond directly, offering flexibility between speed and depth. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1756235463 } }, { "id": "nousresearch/hermes-4-70b", "name": "Nous: Hermes 4 70B", "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit ... reasoning traces before answering. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.38, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1756236182 } }, { "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1", "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1", "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.6, "currency": "USD" }, "output": { "per_million_tokens": 1.7999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744115059 } }, { "id": "nvidia/llama-3.3-nemotron-super-49b-v1.5", "name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", "description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context. It’s post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and multi-turn chat, followed by multiple RL stages; Reward-aware Preference Optimization (RPO) for alignment, RL with Verifiable Rewards (RLVR) for step-wise reasoning, and iterative DPO to refine tool-use behavior. A distillation-driven Neural Architecture Search (“Puzzle”) replaces some attention blocks and varies FFN widths to shrink memory footprint and improve throughput, enabling single-GPU (H100/H200) deployment while preserving instruction following and CoT quality.\n\nIn internal evaluations (NeMo-Skills, up to 16 runs, temp = 0.6, top_p = 0.95), the model reports strong reasoning/coding results, e.g., MATH500 pass@1 = 97.4, AIME-2024 = 87.5, AIME-2025 = 82.71, GPQA = 71.97, LiveCodeBench (24.10–25.02) = 73.58, and MMLU-Pro (CoT) = 79.53. The model targets practical inference efficiency (high tokens/s, reduced VRAM) with Transformers/vLLM support and explicit “reasoning on/off” modes (chat-first defaults, greedy recommended when disabled). Suitable for building agents, assistants, and long-context retrieval systems where balanced accuracy-to-cost and reliable tool use matter.\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760101395 } }, { "id": "nvidia/nemotron-nano-12b-v2-vl", "name": "NVIDIA: Nemotron Nano 12B 2 VL", "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME—surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1761675565 } }, { "id": "nvidia/nemotron-nano-9b-v2", "name": "NVIDIA: Nemotron Nano 9B V2", "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.16, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1757106807 } }, { "id": "openai/chatgpt-4o-latest", "name": "OpenAI: ChatGPT-4o", "description": "OpenAI ChatGPT 4o is continually updated by OpenAI to point to the current version of GPT-4o used by ChatGPT. It therefore differs slightly from the API version of [GPT-4o](/models/openai/gpt-4o) in that it has additional RLHF. It is intended for research and evaluation.\n\nOpenAI notes that this model is not suited for production use-cases as it may be removed or redirected to another model in the future.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1723593600 } }, { "id": "openai/codex-mini", "name": "OpenAI: Codex Mini", "description": "codex-mini-latest is a fine-tuned version of o4-mini specifically for use in Codex CLI. For direct use in the API, we recommend starting with gpt-4.1.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 1.5, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1747409761 } }, { "id": "openai/gpt-3.5-turbo", "name": "OpenAI: GPT-3.5 Turbo", "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16385, "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1685232000 } }, { "id": "openai/gpt-3.5-turbo-0613", "name": "OpenAI: GPT-3.5 Turbo (older v0613)", "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4095, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1706140800 } }, { "id": "openai/gpt-3.5-turbo-16k", "name": "OpenAI: GPT-3.5 Turbo 16k", "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16385, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1693180800 } }, { "id": "openai/gpt-3.5-turbo-instruct", "name": "OpenAI: GPT-3.5 Turbo Instruct", "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4095, "pricing": { "input": { "per_million_tokens": 1.5, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1695859200 } }, { "id": "openai/gpt-4", "name": "OpenAI: GPT-4", "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8191, "pricing": { "input": { "per_million_tokens": 30, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1685232000 } }, { "id": "openai/gpt-4-0314", "name": "OpenAI: GPT-4 (older v0314)", "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8191, "pricing": { "input": { "per_million_tokens": 30, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1685232000 } }, { "id": "openai/gpt-4-1106-preview", "name": "OpenAI: GPT-4 Turbo (older v1106)", "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1699228800 } }, { "id": "openai/gpt-4-turbo", "name": "OpenAI: GPT-4 Turbo", "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1712620800 } }, { "id": "openai/gpt-4-turbo-preview", "name": "OpenAI: GPT-4 Turbo Preview", "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 30, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1706140800 } }, { "id": "openai/gpt-4.1", "name": "OpenAI: GPT-4.1", "description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1047576, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744651385 } }, { "id": "openai/gpt-4.1-mini", "name": "OpenAI: GPT-4.1 Mini", "description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1047576, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 1.5999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744651381 } }, { "id": "openai/gpt-4.1-nano", "name": "OpenAI: GPT-4.1 Nano", "description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1047576, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744651369 } }, { "id": "openai/gpt-4o", "name": "OpenAI: GPT-4o", "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1715558400 } }, { "id": "openai/gpt-4o-2024-05-13", "name": "OpenAI: GPT-4o (2024-05-13)", "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 5, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1715558400 } }, { "id": "openai/gpt-4o-2024-08-06", "name": "OpenAI: GPT-4o (2024-08-06)", "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1722902400 } }, { "id": "openai/gpt-4o-2024-11-20", "name": "OpenAI: GPT-4o (2024-11-20)", "description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability. It’s also better at working with uploaded files, providing deeper insights & more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1732127594 } }, { "id": "openai/gpt-4o-audio-preview", "name": "OpenAI: GPT-4o Audio", "description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences. Audio outputs are currently not supported. Audio tokens are priced at $40 per million input audio tokens.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1755233061 } }, { "id": "openai/gpt-4o-mini", "name": "OpenAI: GPT-4o-mini", "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1721260800 } }, { "id": "openai/gpt-4o-mini-2024-07-18", "name": "OpenAI: GPT-4o-mini (2024-07-18)", "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1721260800 } }, { "id": "openai/gpt-4o-mini-search-preview", "name": "OpenAI: GPT-4o-mini Search Preview", "description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741818122 } }, { "id": "openai/gpt-4o-search-preview", "name": "OpenAI: GPT-4o Search Preview", "description": "GPT-4o Search Previewis a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741817949 } }, { "id": "openai/gpt-4o:extended", "name": "OpenAI: GPT-4o (extended)", "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 6, "currency": "USD" }, "output": { "per_million_tokens": 18, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1715558400 } }, { "id": "openai/gpt-5", "name": "OpenAI: GPT-5", "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754587413 } }, { "id": "openai/gpt-5-chat", "name": "OpenAI: GPT-5 Chat", "description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754587837 } }, { "id": "openai/gpt-5-codex", "name": "OpenAI: GPT-5 Codex", "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758643403 } }, { "id": "openai/gpt-5-image", "name": "OpenAI: GPT-5 Image", "description": "[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's most advanced language model with state-of-the-art image generation capabilities. It offers major improvements in reasoning, code quality, and user experience while incorporating GPT Image 1's superior instruction following, text rendering, and detailed image editing.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760447986 } }, { "id": "openai/gpt-5-image-mini", "name": "OpenAI: GPT-5 Image Mini", "description": "GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation. This natively multimodal model features superior instruction following, text rendering, and detailed image editing with reduced latency and cost. It excels at high-quality visual creation while maintaining strong text understanding, making it ideal for applications that require both efficient image generation and text processing at scale.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "pricing": { "input": { "per_million_tokens": 2.5, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760624583 } }, { "id": "openai/gpt-5-mini", "name": "OpenAI: GPT-5 Mini", "description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754587407 } }, { "id": "openai/gpt-5-nano", "name": "OpenAI: GPT-5 Nano", "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754587402 } }, { "id": "openai/gpt-5-pro", "name": "OpenAI: GPT-5 Pro", "description": "GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 120, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1759776663 } }, { "id": "openai/gpt-5.1", "name": "OpenAI: GPT-5.1", "description": "GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\n\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763060305 } }, { "id": "openai/gpt-5.1-chat", "name": "OpenAI: GPT-5.1 Chat", "description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.1 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763060302 } }, { "id": "openai/gpt-5.1-codex", "name": "OpenAI: GPT-5.1-Codex", "description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "pricing": { "input": { "per_million_tokens": 1.25, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763060298 } }, { "id": "openai/gpt-5.1-codex-mini", "name": "OpenAI: GPT-5.1-Codex-Mini", "description": "GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 400000, "pricing": { "input": { "per_million_tokens": 0.25, "currency": "USD" }, "output": { "per_million_tokens": 2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763057820 } }, { "id": "openai/gpt-oss-120b", "name": "OpenAI: gpt-oss-120b", "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754414231 } }, { "id": "openai/gpt-oss-120b:exacto", "name": "OpenAI: gpt-oss-120b (exacto)", "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754414231 } }, { "id": "openai/gpt-oss-20b", "name": "OpenAI: gpt-oss-20b", "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.14, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754414229 } }, { "id": "openai/gpt-oss-safeguard-20b", "name": "OpenAI: gpt-oss-safeguard-20b", "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.075, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1761752836 } }, { "id": "openai/o1", "name": "OpenAI: o1", "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding. The o1 model series is trained with large-scale reinforcement learning to reason using chain of thought. \n\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology. Learn more in the [launch announcement](https://openai.com/o1).\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 15, "currency": "USD" }, "output": { "per_million_tokens": 60, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1734459999 } }, { "id": "openai/o1-pro", "name": "OpenAI: o1-pro", "description": "The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o1-pro model uses more compute to think harder and provide consistently better answers.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 150, "currency": "USD" }, "output": { "per_million_tokens": 600, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1742423211 } }, { "id": "openai/o3", "name": "OpenAI: o3", "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744823457 } }, { "id": "openai/o3-deep-research", "name": "OpenAI: o3 Deep Research", "description": "o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 10, "currency": "USD" }, "output": { "per_million_tokens": 40, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760129661 } }, { "id": "openai/o3-mini", "name": "OpenAI: o3 Mini", "description": "OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding.\n\nThis model supports the `reasoning_effort` parameter, which can be set to \"high\", \"medium\", or \"low\" to control the thinking time of the model. The default is \"medium\". OpenRouter also offers the model slug `openai/o3-mini-high` to default the parameter to \"high\".\n\nThe model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 4.4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738351721 } }, { "id": "openai/o3-mini-high", "name": "OpenAI: o3 Mini High", "description": "OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high. \n\no3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 4.4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1739372611 } }, { "id": "openai/o3-pro", "name": "OpenAI: o3 Pro", "description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.\n\nNote that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 20, "currency": "USD" }, "output": { "per_million_tokens": 80, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1749598352 } }, { "id": "openai/o4-mini", "name": "OpenAI: o4 Mini", "description": "OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 4.4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744820942 } }, { "id": "openai/o4-mini-deep-research", "name": "OpenAI: o4 Mini Deep Research", "description": "o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760129642 } }, { "id": "openai/o4-mini-high", "name": "OpenAI: o4 Mini High", "description": "OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high. \n\nOpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 4.4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744824212 } }, { "id": "opengvlab/internvl3-78b", "name": "OpenGVLab: InternVL3 78B", "description": "The InternVL3 series is an advanced multimodal large language model (MLLM). Compared to InternVL 2.5, InternVL3 demonstrates stronger multimodal perception and reasoning capabilities. \n\nIn addition, InternVL3 is benchmarked against the Qwen2.5 Chat models, whose pre-trained base models serve as the initialization for its language component. Benefiting from Native Multimodal Pre-Training, the InternVL3 series surpasses the Qwen2.5 series in overall text performance.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.07, "currency": "USD" }, "output": { "per_million_tokens": 0.26, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1757962555 } }, { "id": "openrouter/auto", "name": "Auto Router", "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nThe meta-model is powered by [Not Diamond](https://docs.notdiamond.ai/docs/how-not-diamond-works). Learn more in our [docs](/docs/model-routing).\n\nRequests will be routed to the following models:\n- [openai/gpt-5](/openai/gpt-5)\n- [openai/gpt-5-mini](/openai/gpt-5-mini)\n- [openai/gpt-5-nano](/openai/gpt-5-nano)\n- [openai/gpt-4.1-nano](/openai/gpt-4.1-nano)\n- [openai/gpt-4.1](/openai/gpt-4.1)\n- [openai/gpt-4.1-mini](/openai/gpt-4.1-mini)\n- [openai/gpt-4.1](/openai/gpt-4.1)\n- [openai/gpt-4o-mini](/openai/gpt-4o-mini)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-opus-4-1](/anthropic/claude-opus-4-1)\n- [anthropic/claude-sonnet-4-0](/anthropic/claude-sonnet-4-0)\n- [anthropic/claude-3-7-sonnet-latest](/anthropic/claude-3-7-sonnet-latest)\n- [google/gemini-2.5-pro](/google/gemini-2.5-pro)\n- [google/gemini-2.5-flash](/google/gemini-2.5-flash)\n- [mistral/mistral-large-latest](/mistral/mistral-large-latest)\n- [mistral/mistral-medium-latest](/mistral/mistral-medium-latest)\n- [mistral/mistral-small-latest](/mistral/mistral-small-latest)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [x-ai/grok-3](/x-ai/grok-3)\n- [x-ai/grok-3-mini](/x-ai/grok-3-mini)\n- [x-ai/grok-4](/x-ai/grok-4)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [perplexity/sonar](/perplexity/sonar)\n- [cohere/command-r-plus](/cohere/command-r-plus)\n- [cohere/command-r](/cohere/command-r)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 2000000, "metadata": { "source": "api", "tags": [], "created": 1699401600 } }, { "id": "openrouter/bert-nebulon-alpha", "name": "Bert-Nebulon Alpha", "description": "This is a cloaked model provided to the community to gather feedback. A general-purpose multimodal model (text/image in, text out) designed for reliability, long-context comprehension, and adaptive logic. It is engineered for production-grade assistants, retrieval-augmented systems, science workloads, and complex agentic workflows.\n\n**Note:** All prompts and completions for this model are logged by the provider and may be used to improve the model.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "metadata": { "source": "api", "tags": [], "created": 1764005058 } }, { "id": "perplexity/sonar", "name": "Perplexity: Sonar", "description": "Sonar is lightweight, affordable, fast, and simple to use — now featuring citations and the ability to customize sources. It is designed for companies seeking to integrate lightweight question-and-answer features optimized for speed.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 127072, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738013808 } }, { "id": "perplexity/sonar-deep-research", "name": "Perplexity: Sonar Deep Research", "description": "Sonar Deep Research is a research-focused model designed for multi-step retrieval, synthesis, and reasoning across complex topics. It autonomously searches, reads, and evaluates sources, refining its approach as it gathers information. This enables comprehensive report generation across domains like finance, technology, health, and current events.\n\nNotes on Pricing ([Source](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-deep-research)) \n- Input tokens comprise of Prompt tokens (user prompt) + Citation tokens (these are processed tokens from running searches)\n- Deep Research runs multiple searches to conduct exhaustive research. Searches are priced at $5/1000 searches. A request that does 30 searches will cost $0.15 in this step.\n- Reasoning is a distinct step in Deep Research since it does extensive automated reasoning through all the material it gathers during its research phase. Reasoning tokens here are a bit different than the CoTs in the answer - these are tokens that we use to reason through the research material prior to generating the outputs via the CoTs. Reasoning tokens are priced at $3/1M tokens", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741311246 } }, { "id": "perplexity/sonar-pro", "name": "Perplexity: Sonar Pro", "description": "Note: Sonar Pro pricing includes Perplexity search pricing. See [details here](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-reasoning-pro-and-sonar-pro)\n\nFor enterprises seeking more advanced capabilities, the Sonar Pro API can handle in-depth, multi-step queries with added extensibility, like double the number of citations per search as Sonar on average. Plus, with a larger context window, it can handle longer and more nuanced searches and follow-up questions. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741312423 } }, { "id": "perplexity/sonar-pro-search", "name": "Perplexity: Sonar Pro Search", "description": "Exclusively available on the OpenRouter API, Sonar Pro's new Pro Search mode is Perplexity's most advanced agentic search system. It is designed for deeper reasoning and analysis. Pricing is based on tokens plus $18 per thousand requests. This model powers the Pro Search mode on the Perplexity platform.\n\nSonar Pro Search adds autonomous, multi-step reasoning to Sonar Pro. So, instead of just one query + synthesis, it plans and executes entire research workflows using tools.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1761854366 } }, { "id": "perplexity/sonar-reasoning", "name": "Perplexity: Sonar Reasoning", "description": "Sonar Reasoning is a reasoning model provided by Perplexity based on [DeepSeek R1](/deepseek/deepseek-r1).\n\nIt allows developers to utilize long chain of thought with built-in web search. Sonar Reasoning is uncensored and hosted in US datacenters. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 127000, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738131107 } }, { "id": "perplexity/sonar-reasoning-pro", "name": "Perplexity: Sonar Reasoning Pro", "description": "Note: Sonar Pro pricing includes Perplexity search pricing. See [details here](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-reasoning-pro-and-sonar-pro)\n\nSonar Reasoning Pro is a premier reasoning model powered by DeepSeek R1 with Chain of Thought (CoT). Designed for advanced use cases, it supports in-depth, multi-step queries with a larger context window and can surface more citations per search, enabling more comprehensive and extensible responses.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 8, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741313308 } }, { "id": "prime-intellect/intellect-3", "name": "Prime Intellect: INTELLECT-3", "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 1.1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1764212534 } }, { "id": "qwen/qwen-2.5-72b-instruct", "name": "Qwen2.5 72B Instruct", "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.07, "currency": "USD" }, "output": { "per_million_tokens": 0.26, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1726704000 } }, { "id": "qwen/qwen-2.5-7b-instruct", "name": "Qwen: Qwen2.5 7B Instruct", "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1729036800 } }, { "id": "qwen/qwen-2.5-coder-32b-instruct", "name": "Qwen2.5 Coder 32B Instruct", "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\n\n- Significantly improvements in **code generation**, **code reasoning** and **code fixing**. \n- A more comprehensive foundation for real-world applications such as **Code Agents**. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.\n\nTo read more about its evaluation results, check out [Qwen 2.5 Coder's blog](https://qwenlm.github.io/blog/qwen2.5-coder-family/).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.11, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1731368400 } }, { "id": "qwen/qwen-2.5-vl-7b-instruct", "name": "Qwen: Qwen2.5-VL 7B Instruct", "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1724803200 } }, { "id": "qwen/qwen-max", "name": "Qwen: Qwen-Max ", "description": "Qwen-Max, based on Qwen2.5, provides the best inference performance among [Qwen models](/qwen), especially for complex multi-step tasks. It's a large-scale MoE model that has been pretrained on over 20 trillion tokens and further post-trained with curated Supervised Fine-Tuning (SFT) and Reinforcement Learning from Human Feedback (RLHF) methodologies. The parameter count is unknown.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 1.5999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 6.3999999999999995, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738402289 } }, { "id": "qwen/qwen-plus", "name": "Qwen: Qwen-Plus", "description": "Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738409840 } }, { "id": "qwen/qwen-plus-2025-07-28", "name": "Qwen: Qwen Plus 0728", "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1757347599 } }, { "id": "qwen/qwen-plus-2025-07-28:thinking", "name": "Qwen: Qwen Plus 0728 (thinking)", "description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1757347599 } }, { "id": "qwen/qwen-turbo", "name": "Qwen: Qwen-Turbo", "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1000000, "pricing": { "input": { "per_million_tokens": 0.049999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.19999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738410974 } }, { "id": "qwen/qwen-vl-max", "name": "Qwen: Qwen VL Max", "description": "Qwen VL Max is a visual understanding model with 7500 tokens context length. It excels in delivering optimal performance for a broader spectrum of complex tasks.\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.7999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 3.1999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738434304 } }, { "id": "qwen/qwen-vl-plus", "name": "Qwen: Qwen VL Plus", "description": "Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for image input. It delivers significant performance across a broad range of visual tasks.\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 7500, "pricing": { "input": { "per_million_tokens": 0.21, "currency": "USD" }, "output": { "per_million_tokens": 0.63, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738731255 } }, { "id": "qwen/qwen2.5-coder-7b-instruct", "name": "Qwen: Qwen2.5 Coder 7B Instruct", "description": "Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing. Based on the Qwen2.5 architecture, it incorporates enhancements like RoPE, SwiGLU, RMSNorm, and GQA attention with support for up to 128K tokens using YaRN-based extrapolation. It is trained on a large corpus of source code, synthetic data, and text-code grounding, providing robust performance across programming languages and agentic coding workflows.\n\nThis model is part of the Qwen2.5-Coder family and offers strong compatibility with tools like vLLM for efficient deployment. Released under the Apache 2.0 license.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.03, "currency": "USD" }, "output": { "per_million_tokens": 0.09, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744734887 } }, { "id": "qwen/qwen3-235b-a22b", "name": "Qwen: Qwen3 235B A22B", "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 0.54, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1745875757 } }, { "id": "qwen/qwen3-235b-a22b-2507", "name": "Qwen: Qwen3 235B A22B Instruct 2507", "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.072, "currency": "USD" }, "output": { "per_million_tokens": 0.464, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753119555 } }, { "id": "qwen/qwen3-235b-a22b-thinking-2507", "name": "Qwen: Qwen3 235B A22B Thinking 2507", "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.11, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753449557 } }, { "id": "qwen/qwen3-30b-a3b-instruct-2507", "name": "Qwen: Qwen3 30B A3B Instruct 2507", "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.08, "currency": "USD" }, "output": { "per_million_tokens": 0.33, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753806965 } }, { "id": "qwen/qwen3-30b-a3b-thinking-2507", "name": "Qwen: Qwen3 30B A3B Thinking 2507", "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\n\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.09, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1756399192 } }, { "id": "qwen/qwen3-coder", "name": "Qwen: Qwen3 Coder 480B A35B", "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.22, "currency": "USD" }, "output": { "per_million_tokens": 0.95, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753230546 } }, { "id": "qwen/qwen3-coder-30b-a3b-instruct", "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.06, "currency": "USD" }, "output": { "per_million_tokens": 0.25, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753972379 } }, { "id": "qwen/qwen3-coder-flash", "name": "Qwen: Qwen3 Coder Flash", "description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758115536 } }, { "id": "qwen/qwen3-coder-plus", "name": "Qwen: Qwen3 Coder Plus", "description": "Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 1, "currency": "USD" }, "output": { "per_million_tokens": 5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758662707 } }, { "id": "qwen/qwen3-coder:exacto", "name": "Qwen: Qwen3 Coder 480B A35B (exacto)", "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.38, "currency": "USD" }, "output": { "per_million_tokens": 1.53, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753230546 } }, { "id": "qwen/qwen3-max", "name": "Qwen: Qwen3 Max", "description": "Qwen3-Max is an updated release built on the Qwen3 series, offering major improvements in reasoning, instruction following, multilingual support, and long-tail knowledge coverage compared to the January 2025 version. It delivers higher accuracy in math, coding, logic, and science tasks, follows complex instructions in Chinese and English more reliably, reduces hallucinations, and produces higher-quality responses for open-ended Q&A, writing, and conversation. The model supports over 100 languages with stronger translation and commonsense reasoning, and is optimized for retrieval-augmented generation (RAG) and tool calling, though it does not include a dedicated “thinking” mode.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 1.2, "currency": "USD" }, "output": { "per_million_tokens": 6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758662808 } }, { "id": "qwen/qwen3-next-80b-a3b-instruct", "name": "Qwen: Qwen3 Next 80B A3B Instruct", "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.7999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1757612213 } }, { "id": "qwen/qwen3-next-80b-a3b-thinking", "name": "Qwen: Qwen3 Next 80B A3B Thinking", "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.12, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1757612284 } }, { "id": "qwen/qwen3-vl-235b-a22b-instruct", "name": "Qwen: Qwen3 VL 235B A22B Instruct", "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.21, "currency": "USD" }, "output": { "per_million_tokens": 1.9, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758668687 } }, { "id": "qwen/qwen3-vl-235b-a22b-thinking", "name": "Qwen: Qwen3 VL 235B A22B Thinking", "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758668690 } }, { "id": "qwen/qwen3-vl-30b-a3b-instruct", "name": "Qwen: Qwen3 VL 30B A3B Instruct", "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "pricing": { "input": { "per_million_tokens": 0.15, "currency": "USD" }, "output": { "per_million_tokens": 0.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1759794476 } }, { "id": "qwen/qwen3-vl-30b-a3b-thinking", "name": "Qwen: Qwen3 VL 30B A3B Thinking", "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.16, "currency": "USD" }, "output": { "per_million_tokens": 0.7999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1759794479 } }, { "id": "qwen/qwen3-vl-8b-instruct", "name": "Qwen: Qwen3 VL 8B Instruct", "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.064, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760463308 } }, { "id": "qwen/qwen3-vl-8b-thinking", "name": "Qwen: Qwen3 VL 8B Thinking", "description": "Qwen3-VL-8B-Thinking is the reasoning-optimized variant of the Qwen3-VL-8B multimodal model, designed for advanced visual and textual reasoning across complex scenes, documents, and temporal sequences. It integrates enhanced multimodal alignment and long-context processing (native 256K, expandable to 1M tokens) for tasks such as scientific visual analysis, causal inference, and mathematical reasoning over image or video inputs.\n\nCompared to the Instruct edition, the Thinking version introduces deeper visual-language fusion and deliberate reasoning pathways that improve performance on long-chain logic tasks, STEM problem-solving, and multi-step video understanding. It achieves stronger temporal grounding via Interleaved-MRoPE and timestamp-aware embeddings, while maintaining robust OCR, multilingual comprehension, and text generation on par with large text-only LLMs.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 0.18, "currency": "USD" }, "output": { "per_million_tokens": 2.0999999999999996, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1760463746 } }, { "id": "raifle/sorcererlm-8x22b", "name": "SorcererLM 8x22B", "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16000, "pricing": { "input": { "per_million_tokens": 4.5, "currency": "USD" }, "output": { "per_million_tokens": 4.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1731105083 } }, { "id": "relace/relace-apply-3", "name": "Relace: Relace Apply 3", "description": "Relace Apply 3 is a specialized code-patching LLM that merges AI-suggested edits straight into your source files. It can apply updates from GPT-4o, Claude, and others into your files at 10,000 tokens/sec on average.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Relace. Learn more about this model in their [documentation](https://docs.relace.ai/api-reference/instant-apply/apply)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 0.85, "currency": "USD" }, "output": { "per_million_tokens": 1.25, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758891572 } }, { "id": "sao10k/l3-euryale-70b", "name": "Sao10k: Llama 3 Euryale 70B v2.1", "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).\n\n- Better prompt adherence.\n- Better anatomy / spatial awareness.\n- Adapts much better to unique and custom formatting / reply formats.\n- Very creative, lots of unique swipes.\n- Is not restrictive during roleplays.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 1.48, "currency": "USD" }, "output": { "per_million_tokens": 1.48, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1718668800 } }, { "id": "sao10k/l3-lunaris-8b", "name": "Sao10K: Llama 3 8B Lunaris", "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 0.04, "currency": "USD" }, "output": { "per_million_tokens": 0.049999999999999996, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1723507200 } }, { "id": "sao10k/l3.1-70b-hanami-x1", "name": "Sao10K: Llama 3.1 70B Hanami x1", "description": "This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 3, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1736302854 } }, { "id": "sao10k/l3.1-euryale-70b", "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.65, "currency": "USD" }, "output": { "per_million_tokens": 0.75, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1724803200 } }, { "id": "sao10k/l3.3-euryale-70b", "name": "Sao10K: Llama 3.3 Euryale 70B", "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.65, "currency": "USD" }, "output": { "per_million_tokens": 0.75, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1734535928 } }, { "id": "switchpoint/router", "name": "Switchpoint Router", "description": "Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library. \n\nAs the world of LLMs advances, our router gets smarter, ensuring you always benefit from the industry's newest models without changing your workflow.\n\nThis model is configured for a simple, flat rate per response here on OpenRouter. It's powered by the full routing engine from [Switchpoint AI](https://www.switchpoint.dev).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.85, "currency": "USD" }, "output": { "per_million_tokens": 3.4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1752272899 } }, { "id": "thedrummer/anubis-70b-v1.1", "name": "TheDrummer: Anubis 70B V1.1", "description": "TheDrummer's Anubis v1.1 is an unaligned, creative Llama 3.3 70B model focused on providing character-driven roleplay & stories. It excels at gritty, visceral prose, unique character adherence, and coherent narratives, while maintaining the instruction following Llama 3.3 70B is known for.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.75, "currency": "USD" }, "output": { "per_million_tokens": 1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1751208347 } }, { "id": "thedrummer/cydonia-24b-v4.1", "name": "TheDrummer: Cydonia 24B V4.1", "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758931878 } }, { "id": "thedrummer/rocinante-12b", "name": "TheDrummer: Rocinante 12B", "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.16999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.43, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1727654400 } }, { "id": "thedrummer/skyfall-36b-v2", "name": "TheDrummer: Skyfall 36B V2", "description": "Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.5, "currency": "USD" }, "output": { "per_million_tokens": 0.7999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741636566 } }, { "id": "thedrummer/unslopnemo-12b", "name": "TheDrummer: UnslopNemo 12B", "description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 0.39999999999999997, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1731103448 } }, { "id": "tngtech/deepseek-r1t2-chimera", "name": "TNG: DeepSeek R1T2 Chimera", "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech. It is a 671 B-parameter mixture-of-experts text-generation model assembled from DeepSeek-AI’s R1-0528, R1, and V3-0324 checkpoints with an Assembly-of-Experts merge. The tri-parent design yields strong reasoning performance while running roughly 20 % faster than the original R1 and more than 2× faster than R1-0528 under vLLM, giving a favorable cost-to-intelligence trade-off. The checkpoint supports contexts up to 60 k tokens in standard use (tested to ~130 k) and maintains consistent token behaviour, making it suitable for long-context analysis, dialogue and other open-ended generation tasks.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1751986985 } }, { "id": "tngtech/tng-r1t-chimera", "name": "TNG: R1T Chimera", "description": "TNG-R1T-Chimera is an experimental LLM with a faible for creative storytelling and character interaction. It is a derivate of the original TNG/DeepSeek-R1T-Chimera released in April 2025 and is available exclusively via Chutes and OpenRouter.\n\nCharacteristics and improvements include:\n\nWe think that it has a creative and pleasant personality.\nIt has a preliminary EQ-Bench3 value of about 1305.\nIt is quite a bit more intelligent than the original, albeit a slightly slower.\nIt is much more think-token consistent, i.e. reasoning and answer blocks are properly delineated.\nTool calling is much improved.\n\nTNG Tech, the model authors, ask that users follow the careful guidelines that Microsoft has created for their \"MAI-DS-R1\" DeepSeek-based model. These guidelines are available on Hugging Face (https://huggingface.co/microsoft/MAI-DS-R1).", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 1.2, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1764184161 } }, { "id": "undi95/remm-slerp-l2-13b", "name": "ReMM SLERP 13B", "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 6144, "pricing": { "input": { "per_million_tokens": 0.44999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 0.65, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1689984000 } }, { "id": "x-ai/grok-3", "name": "xAI: Grok 3", "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1749582908 } }, { "id": "x-ai/grok-3-beta", "name": "xAI: Grok 3 Beta", "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\nExcels in structured tasks and benchmarks like GPQA, LCB, and MMLU-Pro where it outperforms Grok 3 Mini even on high thinking. \n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744240068 } }, { "id": "x-ai/grok-3-mini", "name": "xAI: Grok 3 Mini", "description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1749583245 } }, { "id": "x-ai/grok-3-mini-beta", "name": "xAI: Grok 3 Mini Beta", "description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand extensive domain knowledge, and shines in math-specific and quantitative use cases, such as solving challenging puzzles or math problems.\n\nTransparent \"thinking\" traces accessible. Defaults to low reasoning, can boost with setting `reasoning: { effort: \"high\" }`\n\nNote: That there are two xAI endpoints for this model. By default when using this model we will always route you to the base endpoint. If you want the fast endpoint you can add `provider: { sort: throughput}`, to sort by throughput instead. \n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.3, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1744240195 } }, { "id": "x-ai/grok-4", "name": "xAI: Grok 4", "description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not exposed, reasoning cannot be disabled, and the reasoning effort cannot be specified. Pricing increases once the total tokens in a given request is greater than 128k tokens. See more details on the [xAI docs](https://docs.x.ai/docs/models/grok-4-0709)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 15, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1752087689 } }, { "id": "x-ai/grok-4-fast", "name": "xAI: Grok 4 Fast", "description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window. It comes in two flavors: non-reasoning and reasoning. Read more about the model on xAI's [news post](http://x.ai/news/grok-4-fast).\n\nReasoning can be enabled/disabled using the `reasoning` `enabled` parameter in the API. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#controlling-reasoning-tokens)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 2000000, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 0.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1758240090 } }, { "id": "x-ai/grok-4.1-fast", "name": "xAI: Grok 4.1 Fast", "description": "Grok 4.1 Fast is xAI's best agentic tool calling model that shines in real-world use cases like customer support and deep research. 2M context window.\n\nReasoning can be enabled/disabled using the `reasoning` `enabled` parameter in the API. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#controlling-reasoning-tokens)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 2000000, "metadata": { "source": "api", "tags": [], "created": 1763587502 } }, { "id": "x-ai/grok-code-fast-1", "name": "xAI: Grok Code Fast 1", "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality work flows.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256000, "pricing": { "input": { "per_million_tokens": 0.19999999999999998, "currency": "USD" }, "output": { "per_million_tokens": 1.5, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1756238927 } }, { "id": "z-ai/glm-4-32b", "name": "Z.AI: GLM 4 32B ", "description": "GLM 4 32B is a cost-effective foundation language model.\n\nIt can efficiently perform complex tasks and has significantly enhanced capabilities in tool use, online search, and code-related intelligent tasks.\n\nIt is made by the same lab behind the thudm models.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.09999999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753376617 } }, { "id": "z-ai/glm-4.5", "name": "Z.AI: GLM 4.5", "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.35, "currency": "USD" }, "output": { "per_million_tokens": 1.55, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753471347 } }, { "id": "z-ai/glm-4.5-air", "name": "Z.AI: GLM 4.5 Air", "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 0.10400000000000001, "currency": "USD" }, "output": { "per_million_tokens": 0.6799999999999999, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1753471258 } }, { "id": "z-ai/glm-4.5v", "name": "Z.AI: GLM 4.5V", "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 0.48, "currency": "USD" }, "output": { "per_million_tokens": 1.44, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1754922288 } }, { "id": "z-ai/glm-4.6", "name": "Z.AI: GLM 4.6", "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 202752, "pricing": { "input": { "per_million_tokens": 0.39999999999999997, "currency": "USD" }, "output": { "per_million_tokens": 1.75, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1759235576 } }, { "id": "z-ai/glm-4.6:exacto", "name": "Z.AI: GLM 4.6 (exacto)", "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 204800, "pricing": { "input": { "per_million_tokens": 0.44, "currency": "USD" }, "output": { "per_million_tokens": 1.76, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1759235576 } }, { "id": "deepseek/deepseek-v3.2", "name": "deepseek/deepseek-v3.2", "description": "DeepSeek-V3.2 是一款在高效推理、复杂推理能力与智能体场景中表现突出的领先模型。其基于 DeepSeek Sparse Attention(DSA)稀疏注意力机制,在显著降低计算开销的同时优化长上下文性能;通过可扩展强化学习框架,整体能力达到 GPT-5 同级,高算力版本 V3.2-Speciale 更在推理表现上接近 Gemini-3.0-Pro;同时,模型依托大型智能体任务合成管线,具备更强的工具调用与多步骤决策能力,并在 2025 年 IMO 与 IOI 中取得金牌级表现。作为 MaaS 平台,我们已对 DeepSeek-V3.2 完成深度适配,通过动态调度、批处理加速、低延迟推理与企业级 SLA 保障,进一步增强其在企业生产环境中的稳定性、性价比与可控性,适用于搜索、问答、智能体、代码、数据处理等多类高价值场景。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 65536, "metadata": { "source": "api", "tags": [], "created": 1764589183 } }, { "id": "paddlepaddle/paddleocr-vl", "name": "paddlepaddle/paddleocr-vl", "description": "百度PaddleOCR-VL以0.9B小参数登顶OmniDocBench全球榜首,四大核心能力全优,109种语言通吃!两阶段设计“先拆解再识别”,用动态分辨率视觉编码器+轻量语言模型实现又快又准,每秒处理1881个Token。3000万训练数据+自动化标注炼成全能文档解析王。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 16384, "metadata": { "source": "api", "tags": [], "created": 1761531849 } }, { "id": "deepseek/deepseek-ocr", "name": "deepseek/deepseek-ocr", "description": "DeepSeek-OCR是DeepSeek AI于2025年10月20日发布的多模态模型,旨在探索视觉-文本压缩边界,专注于文档识别及图像转文本场景的解决方案该模型由DeepEncoder编码器与DeepSeek3B-MoE-A570M解码器构成,总参数规模约3B,支持将长文本渲染为高压缩比图像,实现10倍无损压缩时OCR准确率达97%,20倍压缩率下仍保有约60%准确率。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 8192, "metadata": { "source": "api", "tags": [], "created": 1761359410 } }, { "id": "zai-org/glm-4.6v", "name": "zai-org/glm-4.6v", "description": "Zhipu 最新的视觉推理模型,在同等规模下视觉理解准确率达到了 SOTA 水平,原生支持工具调用,能够自动完成任务,支持 128K 超长上下文,并允许灵活开启或关闭思考功能。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 32768, "metadata": { "source": "api", "tags": [], "created": 1765205060 } }, { "id": "zai-org/glm-4.6", "name": "zai-org/glm-4.6", "description": "GLM-4.6 是智谱最新的旗舰模型,其总参数量 355B,激活参数 32B,上下文提升至 200K,8 大权威基准全面提升,稳居国产模型首位。在编程、推理、搜索、写作、智能体应用等核心能力均完成对 GLM-4.5 的超越。\n", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 131072, "metadata": { "source": "api", "tags": [], "created": 1759220209 } }, { "id": "kat-coder", "name": "kat-coder", "description": "KAT-Coder-Pro V1 是 KwaiKAT 系列中最先进的智能体编程(Agentic Coding)模型。该模型专为 Agentic Coding 设计,全面覆盖真实编程任务与场景,通过大规模智能体强化学习,实现智能行为涌现,在代码编写性能上显著超越同类模型,已通过数千名内部工程师的严格测试。在SWE-Bench Verified 编码能力基准测试中取得了高达 73.4% 的解决率,展现出顶级的编程性能。\n该模型经过了多阶段训练过程的优化,包括中期训练(mid-training)、监督式微调(SFT)、强化微调(RFT)和可扩展的智能体强化学习(scalable agentic RL),使其在工具使用能力、多轮交互、指令遵循、泛化性和综合能力方面都得到了显著提升。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 32000, "metadata": { "source": "api", "tags": [], "created": 1759237231 } }, { "id": "deepseek/deepseek-v3-0324", "name": "deepseek/deepseek-v3-0324", "description": "DeepSeek V3 0324 是深度求索(DeepSeek)团队旗舰级对话模型系列的最新版本,采用混合专家(Mixture-of-Experts, MoE)架构,参数量达685B参数。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 163840, "metadata": { "source": "api", "tags": [], "created": 1742959439 } }, { "id": "deepseek/deepseek-v3.1", "name": "deepseek/deepseek-v3.1", "description": "DeepSeek-V3.1 是 DeepSeek最新的模型,支持思考和非思考混合模式。 该模型是在 DeepSeek-V3.1-Base 基础上进行后期训练的,而该基础模型是通过两阶段长上下文扩展方法从原始 V3 基础检查点发展而来,该方法遵循了原始 DeepSeek-V3 报告中概述的技术路线。我们通过收集更多长文本文档显著扩展了训练数据集,并大幅延长了两个训练阶段的规模:32K 上下文扩展阶段的训练量提升10倍达到6300亿token,128K扩展阶段则扩大3.3倍至2090亿token。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 32768, "metadata": { "source": "api", "tags": [], "created": 1755759184 } }, { "id": "qwen/qwen3-coder-480b-a35b-instruct", "name": "qwen/qwen3-coder-480b-a35b-instruct", "description": "Qwen3-Coder-480B-A35B-Instruct 是由Qwen推出的尖端开源编程模型,在智能体编程(Agentic Coding)、浏览器自动化及核心开发任务中达到与Claude Sonnet同等的性能水平。该模型原生支持256K上下文窗口(通过YaRN技术可扩展至1M token),擅长仓库级代码分析,并针对Qwen Code、CLINE等平台设计了专用函数调用协议——使其成为复杂实际开发工作流的理想选择。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 65536, "metadata": { "source": "api", "tags": [], "created": 1753233447 } }, { "id": "moonshotai/kimi-k2-instruct", "name": "moonshotai/kimi-k2-instruct", "description": "Kimi K2 是一种最先进的专家混合 (MoE) 语言模型,具有 320 亿个激活参数和 1 万亿个总参数。使用 Muon 优化器进行训练,Kimi K2 在前沿知识、推理和编码任务方面实现了卓越的性能,同时针对代理功能进行了精心优化,专为工具使用、推理和自主解决问题而设计。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 128000, "metadata": { "source": "api", "tags": [], "created": 1752263379 } }, { "id": "qwen/qwen3-235b-a22b-instruct-2507", "name": "qwen/qwen3-235b-a22b-instruct-2507", "description": "Qwen3-235B-A22B-Instruct-2507 是基于 Qwen3-235B 架构的多语言指令微调混合专家语言模型,每次前向推理激活 220 亿参数。该模型针对通用文本生成任务优化,涵盖指令遵循、逻辑推理、数学计算、代码生成及工具调用等能力。其原生支持 26.2 万 token 的超长上下文窗口,且未采用 \"\" 思维链显式标注模式。\n相较于基础版本,本版本在知识覆盖广度、长文本推理能力、编程基准测试以及开放性任务对齐度等方面实现显著提升。模型尤其擅长多语言理解、数学推理(如美国数学邀请赛 AIME、哈佛-麻省理工数学锦标赛 HMMT),并在 Arena-Hard 综合评测与写作专项评测 WritingBench 中表现优异。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 260000, "metadata": { "source": "api", "tags": [], "created": 1753176332 } }, { "id": "baichuan/baichuan-m2-32b", "name": "baichuan/baichuan-m2-32b", "description": "百川M2是一款专为真实医疗场景设计的智能推理模型。我们从实际医疗问题出发,基于大规模验证系统进行强化学习训练,在保持通用能力的同时,实现了医疗推理能力的突破性提升。作为当前全球领先的开源医疗大模型,百川M2在HealthBench医疗基准测试中表现优异:超越所有开源模型(包括GPT-OSS-120B),优于众多前沿闭源模型,是目前最接近GPT-5医疗能力的开源模型\n实践证明:强大的验证系统是连接模型能力与现实应用的关键,端到端的强化学习方法能显著提升医疗推理能力\n百川M2的发布,标志着医疗人工智能领域的技术突破,为智慧医疗发展提供了全新可能。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 131072, "metadata": { "source": "api", "tags": [], "created": 1755105023 } }, { "id": "zai-org/glm-4.5v", "name": "zai-org/glm-4.5v", "description": "Z.ai推出的GLM-4.5V视觉推理模型树立了行业新标杆,在42项基准测试中均达到同规模开源模型的最高水平。该模型不仅擅长基准测试,更通过混合训练技术在真实场景中展现出卓越性能,具备全方位的视觉理解能力——包括图像/视频分析、图形界面交互、复杂文档处理以及精准的视觉元素定位等核心功能。\n在中国GeoGuessr地理定位挑战赛中,GLM-4.5V仅用16小时就超越了99%的人类选手(共21000人参与),一周内攀升至第66名。该模型基于GLM-4.5-Air基础架构开发,继承了GLM-4.1V-Thinking的技术路线,采用1060亿参数的混合专家(MoE)架构实现高效扩展。作为连接前沿AI研究与实际应用的桥梁,GLM-4.5V正在重新定义视觉智能的行业标准", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 16384, "metadata": { "source": "api", "tags": [], "created": 1754915492 } }, { "id": "zai-org/glm-4.5", "name": "zai-org/glm-4.5", "description": "GLM-4.5系列模型是专为智能体(Agent)研发的基座模型。旗舰款GLM-4.5拥有3550亿总参数(320亿激活参数),通过融合推理、编程与智能体能力,满足复杂场景需求。\n作为混合推理系统,它提供双工作模式:\n- 思考模式:支持复杂推理、工具调用和策略规划\n- 即时响应模式:实现低延迟交互,快速生成反馈\n该架构在保持高性能的同时,为动态智能体环境提供自适应解决方案。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 98304, "metadata": { "source": "api", "tags": [], "created": 1753710159 } }, { "id": "qwen/qwen3-235b-a22b-fp8", "name": "qwen/qwen3-235b-a22b-fp8", "description": "实现推理模式和非推理模式的有效融合,可在对话中切换模式。推理能力显著超过QwQ、通用能力显著超过Qwen2.5-72B-Instruct,达到同规模业界SOTA水平。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 20000, "metadata": { "source": "api", "tags": [], "created": 1745898441 } }, { "id": "qwen/qwen3-32b-fp8", "name": "qwen/qwen3-32b-fp8", "description": "实现推理模式和非推理模式的有效融合,可在对话中切换模式。推理能力显著超过QwQ、通用能力显著超过Qwen2.5-32B-Instruct,达到同规模业界SOTA水平。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 20000, "metadata": { "source": "api", "tags": [], "created": 1745898548 } }, { "id": "qwen/qwen3-30b-a3b-fp8", "name": "qwen/qwen3-30b-a3b-fp8", "description": "实现推理模式和非推理模式的有效融合,可在对话中切换模式。推理能力以更小参数规模比肩QwQ-32B、通用能力显著超过Qwen2.5-14B,达到同规模业界SOTA水平。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 20000, "metadata": { "source": "api", "tags": [], "created": 1745898496 } }, { "id": "deepseek/deepseek-prover-v2-671b", "name": "deepseek/deepseek-prover-v2-671b", "description": "Deepseek 全新开源模型 DeepSeek-Prover-V2-671B,专注于数学定理证明任务。该模型基于混合专家 (MoE) 架构,并利用 Lean 4 框架进行形式化推理训练。该模型参数规模达 6710 亿,结合强化学习和大规模合成数据,显著提升了自动化证明能力。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 160000, "metadata": { "source": "api", "tags": [], "created": 1746010764 } }, { "id": "deepseek/deepseek-r1-turbo", "name": "deepseek/deepseek-r1-turbo", "description": "DeepSeek R1 (Turbo)是派欧算力云平台提供的最新高性能DeepSeek R1 模型。DeepSeek R1是DeepSeek团队发布的最新开源模型,具备非常强悍的推理性能,尤其在数学、编程和推理任务上达到了与OpenAI的o1模型相当的水平。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 16000, "metadata": { "source": "api", "tags": [], "created": 1741089539 } }, { "id": "deepseek/deepseek-v3-turbo", "name": "deepseek/deepseek-v3-turbo", "description": "DeepSeek V3 (Turbo) 是派欧算力云平台提供的最新高性能DeepSeek V3 模型。DeepSeek-V3 在推理速度方面实现了比之前模型的重大突破。在开源模型中排名第一,并可与全球最先进的闭源模型相媲美。DeepSeek-V3 采用了多头潜在注意力 (MLA) 和 DeepSeekMoE 架构,这些架构在 DeepSeek-V2 中得到了全面验证。此外,DeepSeek-V3 开创了一种用于负载均衡的辅助无损策略,并设定了多标记预测训练目标以获得更强的性能。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 16000, "metadata": { "source": "api", "tags": [], "created": 1741084845 } }, { "id": "deepseek/deepseek-v3/community", "name": "deepseek/deepseek-v3/community", "description": "DeepSeek-V3在推理速度方面实现了比之前模型的重大突破。在开源模型中排名第一,并可与全球最先进的闭源模型相媲美。DeepSeek-V3 采用了多头潜在注意力 (MLA) 和 DeepSeekMoE 架构,这些架构在 DeepSeek-V2 中得到了全面验证。此外,DeepSeek-V3 开创了一种用于负载均衡的辅助无损策略,并设定了多标记预测训练目标以获得更强的性能。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 4000, "metadata": { "source": "api", "tags": [], "created": 1738928844 } }, { "id": "deepseek/deepseek-r1/community", "name": "deepseek/deepseek-r1/community", "description": "DeepSeek R1是DeepSeek团队发布的最新开源模型,具备非常强悍的推理性能,尤其在数学、编程和推理任务上达到了与OpenAI的o1模型相当的水平。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 4000, "metadata": { "source": "api", "tags": [], "created": 1738928740 } }, { "id": "baidu/ernie-4.5-300b-a47b-paddle", "name": "baidu/ernie-4.5-300b-a47b-paddle", "description": "文心4.5系列开源模型为MoE 架构,是一种创新性的多模态异构模型结构,通过跨模态参数共享机制实现模态间知识融合,同时为各单一模态保留专用参数空间。此架构非常适用于从大语言模型向多模态模型的持续预训练范式,在保持甚至提升文本任务性能的基础上,显著增强多模态理解能力。该模型均使用飞桨深度学习框架进行高效训练、推理和部署。在大语言模型的预训练中,模型FLOPs利用率(MFU)达到47%。实验结果显示,该系列模型在多个文本和多模态基准测试中达到SOTA水平,在指令遵循、世界知识记忆、视觉理解和多模态推理任务上效果尤为突出。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 12000, "metadata": { "source": "api", "tags": [], "created": 1751240593 } }, { "id": "qwen/qwen3-8b-fp8", "name": "qwen/qwen3-8b-fp8", "description": "实现思考模式和非思考模式的有效融合,可在对话中切换模式。推理能力达到同规模业界SOTA水平、通用能力显著超过Qwen2.5-7B。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 20000, "metadata": { "source": "api", "tags": [], "created": 1745902078 } }, { "id": "zai-org/glm-4.5-air", "name": "zai-org/glm-4.5-air", "description": "GLM-4.5-Air 是我们最新旗舰模型系列的轻量化版本,同时专为智能体(Agent)中心应用而设计。与 GLM-4.5 一样,它采用专家混合(Mixture-of-Experts,MoE)架构,但参数规模更加紧凑。\nGLM-4.5-Air 支持混合推理模式,提供两种工作模式:\n- 思考模式(Thinking Mode):用于高级推理和工具调用;\n- 非思考模式(Non-Thinking Mode):用于实时交互与快速响应。\n用户可以通过布尔参数 reasoning_enabled 来控制模型的推理行为", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 98304, "metadata": { "source": "api", "tags": [], "created": 1760424157 } }, { "id": "baidu/ernie-4.5-21b-a3b-thingking", "name": "baidu/ernie-4.5-21b-a3b-thingking", "description": "ERNIE-4.5-21B-A3B-Thinking 是一个文本 MoE 后训练模型,每个 token 有 21B 总参数和 3B 激活参数,提高了推理任务的性能,包括逻辑推理、数学、科学、编码、文本生成和通常需要人类专业知识的学术基准。具备高效的工具使用能力,达到 128K 长上下文理解能力。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 65536, "metadata": { "source": "api", "tags": [], "created": 1758508172 } }, { "id": "baidu/ernie-4.5-0.3b", "name": "baidu/ernie-4.5-0.3b", "description": "文心4.5系列开源模型为MoE 架构,是一种创新性的多模态异构模型结构,通过跨模态参数共享机制实现模态间知识融合,同时为各单一模态保留专用参数空间。此架构非常适用于从大语言模型向多模态模型的持续预训练范式,在保持甚至提升文本任务性能的基础上,显著增强多模态理解能力。该模型均使用飞桨深度学习框架进行高效训练、推理和部署。在大语言模型的预训练中,模型FLOPs利用率(MFU)达到47%。实验结果显示,该系列模型在多个文本和多模态基准测试中达到SOTA水平,在指令遵循、世界知识记忆、视觉理解和多模态推理任务上效果尤为突出。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 8000, "metadata": { "source": "api", "tags": [], "created": 1751258298 } }, { "id": "qwen/qwen3-4b-fp8", "name": "qwen/qwen3-4b-fp8", "description": "实现思考模式和非思考模式的有效融合,可在对话中切换模式。推理能力达到同规模业界SOTA水平、模型人类偏好能力显著增强,创意写作、角色扮演、多轮对话、指令遵循能力均有明显提升,用户体验预期明显更佳。", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "max_output_tokens": 20000, "metadata": { "source": "api", "tags": [], "created": 1745902139 } }, { "id": "deepseek/deepseek-v3.2-251201", "name": "deepseek/deepseek-v3.2-251201", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764587960 } }, { "id": "deepseek/deepseek-v3.2-exp-thinking", "name": "deepseek/deepseek-v3.2-exp-thinking", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759143007 } }, { "id": "deepseek-v3.1", "name": "deepseek-v3.1", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755609557 } }, { "id": "deepseek/deepseek-math-v2", "name": "deepseek/deepseek-math-v2", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764843837 } }, { "id": "doubao-seed-1.6-thinking", "name": "doubao-seed-1.6-thinking", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755242697 } }, { "id": "doubao-seed-1.6-flash", "name": "doubao-seed-1.6-flash", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755242599 } }, { "id": "doubao-seed-1.6", "name": "doubao-seed-1.6", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755241785 } }, { "id": "doubao-1.5-vision-pro", "name": "doubao-1.5-vision-pro", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754380642 } }, { "id": "deepseek/deepseek-v3.2-speciale", "name": "deepseek/deepseek-v3.2-speciale", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764588435 } }, { "id": "deepseek-v3-0324", "name": "deepseek-v3-0324", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754380642 } }, { "id": "deepseek/deepseek-v3.1-terminus-thinking", "name": "deepseek/deepseek-v3.1-terminus-thinking", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758547768 } }, { "id": "qwen2.5-vl-7b-instruct", "name": "qwen2.5-vl-7b-instruct", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754380642 } }, { "id": "qwen-vl-max-2025-01-25", "name": "qwen-vl-max-2025-01-25", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754380642 } }, { "id": "qwen-max-2025-01-25", "name": "qwen-max-2025-01-25", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754380642 } }, { "id": "01-ai/yi-large", "name": "01-ai/yi-large", "owned_by": "01-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "abacusai/dracarys-llama-3.1-70b-instruct", "name": "abacusai/dracarys-llama-3.1-70b-instruct", "owned_by": "abacusai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "adept/fuyu-8b", "name": "adept/fuyu-8b", "owned_by": "adept", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "ai21labs/jamba-1.5-large-instruct", "name": "ai21labs/jamba-1.5-large-instruct", "owned_by": "ai21labs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "ai21labs/jamba-1.5-mini-instruct", "name": "ai21labs/jamba-1.5-mini-instruct", "owned_by": "ai21labs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "aisingapore/sea-lion-7b-instruct", "name": "aisingapore/sea-lion-7b-instruct", "owned_by": "aisingapore", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "baai/bge-m3", "name": "baai/bge-m3", "owned_by": "baai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "baichuan-inc/baichuan2-13b-chat", "name": "baichuan-inc/baichuan2-13b-chat", "owned_by": "baichuan-inc", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "bigcode/starcoder2-15b", "name": "bigcode/starcoder2-15b", "owned_by": "bigcode", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "bigcode/starcoder2-7b", "name": "bigcode/starcoder2-7b", "owned_by": "bigcode", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "bytedance/seed-oss-36b-instruct", "name": "bytedance/seed-oss-36b-instruct", "owned_by": "bytedance", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "databricks/dbrx-instruct", "name": "databricks/dbrx-instruct", "owned_by": "databricks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "deepseek-ai/deepseek-coder-6.7b-instruct", "name": "deepseek-ai/deepseek-coder-6.7b-instruct", "owned_by": "deepseek-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "deepseek-ai/deepseek-r1", "name": "deepseek-ai/deepseek-r1", "owned_by": "deepseek-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "deepseek-ai/deepseek-r1-0528", "name": "deepseek-ai/deepseek-r1-0528", "owned_by": "deepseek-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "deepseek-ai/deepseek-v3.1", "name": "deepseek-ai/deepseek-v3.1", "owned_by": "deepseek-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "deepseek-ai/deepseek-v3.1-terminus", "name": "deepseek-ai/deepseek-v3.1-terminus", "owned_by": "deepseek-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/codegemma-1.1-7b", "name": "google/codegemma-1.1-7b", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/codegemma-7b", "name": "google/codegemma-7b", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/deplot", "name": "google/deplot", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/gemma-2-2b-it", "name": "google/gemma-2-2b-it", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/gemma-2b", "name": "google/gemma-2b", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/gemma-3-1b-it", "name": "google/gemma-3-1b-it", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/gemma-3n-e2b-it", "name": "google/gemma-3n-e2b-it", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/gemma-7b", "name": "google/gemma-7b", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/paligemma", "name": "google/paligemma", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/recurrentgemma-2b", "name": "google/recurrentgemma-2b", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "google/shieldgemma-9b", "name": "google/shieldgemma-9b", "owned_by": "google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "gotocompany/gemma-2-9b-cpt-sahabatai-instruct", "name": "gotocompany/gemma-2-9b-cpt-sahabatai-instruct", "owned_by": "gotocompany", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "ibm/granite-3.0-3b-a800m-instruct", "name": "ibm/granite-3.0-3b-a800m-instruct", "owned_by": "ibm", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "ibm/granite-3.0-8b-instruct", "name": "ibm/granite-3.0-8b-instruct", "owned_by": "ibm", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "ibm/granite-3.3-8b-instruct", "name": "ibm/granite-3.3-8b-instruct", "owned_by": "ibm", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "ibm/granite-34b-code-instruct", "name": "ibm/granite-34b-code-instruct", "owned_by": "ibm", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "ibm/granite-8b-code-instruct", "name": "ibm/granite-8b-code-instruct", "owned_by": "ibm", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "ibm/granite-guardian-3.0-8b", "name": "ibm/granite-guardian-3.0-8b", "owned_by": "ibm", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "igenius/colosseum_355b_instruct_16k", "name": "igenius/colosseum_355b_instruct_16k", "owned_by": "igenius", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "igenius/italia_10b_instruct_16k", "name": "igenius/italia_10b_instruct_16k", "owned_by": "igenius", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "institute-of-science-tokyo/llama-3.1-swallow-70b-instruct-v0.1", "name": "institute-of-science-tokyo/llama-3.1-swallow-70b-instruct-v0.1", "owned_by": "institute-of-science-tokyo", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "institute-of-science-tokyo/llama-3.1-swallow-8b-instruct-v0.1", "name": "institute-of-science-tokyo/llama-3.1-swallow-8b-instruct-v0.1", "owned_by": "institute-of-science-tokyo", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "marin/marin-8b-instruct", "name": "marin/marin-8b-instruct", "owned_by": "marin", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mediatek/breeze-7b-instruct", "name": "mediatek/breeze-7b-instruct", "owned_by": "mediatek", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/codellama-70b", "name": "meta/codellama-70b", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama-3.1-70b-instruct", "name": "meta/llama-3.1-70b-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama-3.1-8b-instruct", "name": "meta/llama-3.1-8b-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama-3.2-11b-vision-instruct", "name": "meta/llama-3.2-11b-vision-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama-3.2-1b-instruct", "name": "meta/llama-3.2-1b-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama-3.2-3b-instruct", "name": "meta/llama-3.2-3b-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama-3.2-90b-vision-instruct", "name": "meta/llama-3.2-90b-vision-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama-3.3-70b-instruct", "name": "meta/llama-3.3-70b-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama-4-maverick-17b-128e-instruct", "name": "meta/llama-4-maverick-17b-128e-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama-4-scout-17b-16e-instruct", "name": "meta/llama-4-scout-17b-16e-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama-guard-4-12b", "name": "meta/llama-guard-4-12b", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama2-70b", "name": "meta/llama2-70b", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama3-70b-instruct", "name": "meta/llama3-70b-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta/llama3-8b-instruct", "name": "meta/llama3-8b-instruct", "owned_by": "meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/kosmos-2", "name": "microsoft/kosmos-2", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/phi-3-medium-4k-instruct", "name": "microsoft/phi-3-medium-4k-instruct", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/phi-3-mini-4k-instruct", "name": "microsoft/phi-3-mini-4k-instruct", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/phi-3-small-128k-instruct", "name": "microsoft/phi-3-small-128k-instruct", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/phi-3-small-8k-instruct", "name": "microsoft/phi-3-small-8k-instruct", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/phi-3-vision-128k-instruct", "name": "microsoft/phi-3-vision-128k-instruct", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/phi-3.5-mini-instruct", "name": "microsoft/phi-3.5-mini-instruct", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/phi-3.5-moe-instruct", "name": "microsoft/phi-3.5-moe-instruct", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/phi-3.5-vision-instruct", "name": "microsoft/phi-3.5-vision-instruct", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/phi-4-mini-flash-reasoning", "name": "microsoft/phi-4-mini-flash-reasoning", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "microsoft/phi-4-mini-instruct", "name": "microsoft/phi-4-mini-instruct", "owned_by": "microsoft", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "minimaxai/minimax-m2", "name": "minimaxai/minimax-m2", "owned_by": "minimaxai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/codestral-22b-instruct-v0.1", "name": "mistralai/codestral-22b-instruct-v0.1", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mamba-codestral-7b-v0.1", "name": "mistralai/mamba-codestral-7b-v0.1", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mathstral-7b-v0.1", "name": "mistralai/mathstral-7b-v0.1", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/ministral-14b-instruct-2512", "name": "mistralai/ministral-14b-instruct-2512", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mistral-large-2-instruct", "name": "mistralai/mistral-large-2-instruct", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mistral-large-3-675b-instruct-2512", "name": "mistralai/mistral-large-3-675b-instruct-2512", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mistral-medium-3-instruct", "name": "mistralai/mistral-medium-3-instruct", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mistral-nemotron", "name": "mistralai/mistral-nemotron", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mistral-small-24b-instruct", "name": "mistralai/mistral-small-24b-instruct", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mistral-small-3.1-24b-instruct-2503", "name": "mistralai/mistral-small-3.1-24b-instruct-2503", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mixtral-8x22b-instruct-v0.1", "name": "mistralai/mixtral-8x22b-instruct-v0.1", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mixtral-8x22b-v0.1", "name": "mistralai/mixtral-8x22b-v0.1", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "mistralai/mixtral-8x7b-instruct-v0.1", "name": "mistralai/mixtral-8x7b-instruct-v0.1", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "moonshotai/kimi-k2-instruct-0905", "name": "moonshotai/kimi-k2-instruct-0905", "owned_by": "moonshotai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nv-mistralai/mistral-nemo-12b-instruct", "name": "nv-mistralai/mistral-nemo-12b-instruct", "owned_by": "nv-mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/embed-qa-4", "name": "nvidia/embed-qa-4", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.1-nemoguard-8b-content-safety", "name": "nvidia/llama-3.1-nemoguard-8b-content-safety", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.1-nemoguard-8b-topic-control", "name": "nvidia/llama-3.1-nemoguard-8b-topic-control", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.1-nemotron-51b-instruct", "name": "nvidia/llama-3.1-nemotron-51b-instruct", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.1-nemotron-70b-reward", "name": "nvidia/llama-3.1-nemotron-70b-reward", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.1-nemotron-nano-4b-v1.1", "name": "nvidia/llama-3.1-nemotron-nano-4b-v1.1", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.1-nemotron-nano-8b-v1", "name": "nvidia/llama-3.1-nemotron-nano-8b-v1", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1", "name": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.1-nemotron-safety-guard-8b-v3", "name": "nvidia/llama-3.1-nemotron-safety-guard-8b-v3", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1", "name": "nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.2-nemoretriever-300m-embed-v1", "name": "nvidia/llama-3.2-nemoretriever-300m-embed-v1", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.2-nemoretriever-300m-embed-v2", "name": "nvidia/llama-3.2-nemoretriever-300m-embed-v2", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.2-nv-embedqa-1b-v1", "name": "nvidia/llama-3.2-nv-embedqa-1b-v1", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.2-nv-embedqa-1b-v2", "name": "nvidia/llama-3.2-nv-embedqa-1b-v2", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama-3.3-nemotron-super-49b-v1", "name": "nvidia/llama-3.3-nemotron-super-49b-v1", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama3-chatqa-1.5-70b", "name": "nvidia/llama3-chatqa-1.5-70b", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/llama3-chatqa-1.5-8b", "name": "nvidia/llama3-chatqa-1.5-8b", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/mistral-nemo-minitron-8b-8k-instruct", "name": "nvidia/mistral-nemo-minitron-8b-8k-instruct", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/mistral-nemo-minitron-8b-base", "name": "nvidia/mistral-nemo-minitron-8b-base", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nemoretriever-parse", "name": "nvidia/nemoretriever-parse", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nemotron-4-340b-instruct", "name": "nvidia/nemotron-4-340b-instruct", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nemotron-4-340b-reward", "name": "nvidia/nemotron-4-340b-reward", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nemotron-4-mini-hindi-4b-instruct", "name": "nvidia/nemotron-4-mini-hindi-4b-instruct", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nemotron-mini-4b-instruct", "name": "nvidia/nemotron-mini-4b-instruct", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nemotron-parse", "name": "nvidia/nemotron-parse", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/neva-22b", "name": "nvidia/neva-22b", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nv-embed-v1", "name": "nvidia/nv-embed-v1", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nv-embedcode-7b-v1", "name": "nvidia/nv-embedcode-7b-v1", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nv-embedqa-e5-v5", "name": "nvidia/nv-embedqa-e5-v5", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nv-embedqa-mistral-7b-v2", "name": "nvidia/nv-embedqa-mistral-7b-v2", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nvclip", "name": "nvidia/nvclip", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/nvidia-nemotron-nano-9b-v2", "name": "nvidia/nvidia-nemotron-nano-9b-v2", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/riva-translate-4b-instruct", "name": "nvidia/riva-translate-4b-instruct", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/streampetr", "name": "nvidia/streampetr", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/usdcode-llama-3.1-70b-instruct", "name": "nvidia/usdcode-llama-3.1-70b-instruct", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "nvidia/vila", "name": "nvidia/vila", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "opengpt-x/teuken-7b-instruct-commercial-v0.4", "name": "opengpt-x/teuken-7b-instruct-commercial-v0.4", "owned_by": "opengpt-x", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "rakuten/rakutenai-7b-chat", "name": "rakuten/rakutenai-7b-chat", "owned_by": "rakuten", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "rakuten/rakutenai-7b-instruct", "name": "rakuten/rakutenai-7b-instruct", "owned_by": "rakuten", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "sarvamai/sarvam-m", "name": "sarvamai/sarvam-m", "owned_by": "sarvamai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "snowflake/arctic-embed-l", "name": "snowflake/arctic-embed-l", "owned_by": "snowflake", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "speakleash/bielik-11b-v2.3-instruct", "name": "speakleash/bielik-11b-v2.3-instruct", "owned_by": "speakleash", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "speakleash/bielik-11b-v2.6-instruct", "name": "speakleash/bielik-11b-v2.6-instruct", "owned_by": "speakleash", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "stockmark/stockmark-2-100b-instruct", "name": "stockmark/stockmark-2-100b-instruct", "owned_by": "stockmark", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "thudm/chatglm3-6b", "name": "thudm/chatglm3-6b", "owned_by": "thudm", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "tiiuae/falcon3-7b-instruct", "name": "tiiuae/falcon3-7b-instruct", "owned_by": "tiiuae", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "tokyotech-llm/llama-3-swallow-70b-instruct-v0.1", "name": "tokyotech-llm/llama-3-swallow-70b-instruct-v0.1", "owned_by": "tokyotech-llm", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "upstage/solar-10.7b-instruct", "name": "upstage/solar-10.7b-instruct", "owned_by": "upstage", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "utter-project/eurollm-9b-instruct", "name": "utter-project/eurollm-9b-instruct", "owned_by": "utter-project", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "writer/palmyra-creative-122b", "name": "writer/palmyra-creative-122b", "owned_by": "writer", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "writer/palmyra-fin-70b-32k", "name": "writer/palmyra-fin-70b-32k", "owned_by": "writer", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "writer/palmyra-med-70b", "name": "writer/palmyra-med-70b", "owned_by": "writer", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "writer/palmyra-med-70b-32k", "name": "writer/palmyra-med-70b-32k", "owned_by": "writer", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "yentinglin/llama-3-taiwan-70b-instruct", "name": "yentinglin/llama-3-taiwan-70b-instruct", "owned_by": "yentinglin", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "zyphra/zamba2-7b-instruct", "name": "zyphra/zamba2-7b-instruct", "owned_by": "zyphra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "deepseek-ai/deepseek-v3.2", "name": "deepseek-ai/deepseek-v3.2", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764927217 } }, { "id": "deepseek-ai/deepseek-v3.2-exp", "name": "deepseek-ai/deepseek-v3.2-exp", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760193778 } }, { "id": "llm-research/c4ai-command-r-plus-08-2024", "name": "llm-research/c4ai-command-r-plus-08-2024", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1725120000 } }, { "id": "llm-research/llama-4-maverick-17b-128e-instruct", "name": "llm-research/llama-4-maverick-17b-128e-instruct", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1732517497 } }, { "id": "menlo/jan-nano", "name": "menlo/jan-nano", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751212800 } }, { "id": "minimax/minimax-m1-80k", "name": "minimax/minimax-m1-80k", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751212800 } }, { "id": "mistralai/ministral-8b-instruct-2410", "name": "mistralai/ministral-8b-instruct-2410", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1727712000 } }, { "id": "mistralai/mistral-large-instruct-2407", "name": "mistralai/mistral-large-instruct-2407", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1719763200 } }, { "id": "mistralai/mistral-small-instruct-2409", "name": "mistralai/mistral-small-instruct-2409", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1725120000 } }, { "id": "musepublic/qwen-image-edit", "name": "musepublic/qwen-image-edit", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760111983 } }, { "id": "opencompass/compassjudger-1-32b-instruct", "name": "opencompass/compassjudger-1-32b-instruct", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1733414400 } }, { "id": "opengvlab/internvl3_5-241b-a28b", "name": "opengvlab/internvl3_5-241b-a28b", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759136265 } }, { "id": "paddlepaddle/ernie-4.5-0.3b-pt", "name": "paddlepaddle/ernie-4.5-0.3b-pt", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751212800 } }, { "id": "paddlepaddle/ernie-4.5-21b-a3b-pt", "name": "paddlepaddle/ernie-4.5-21b-a3b-pt", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751212800 } }, { "id": "paddlepaddle/ernie-4.5-300b-a47b-pt", "name": "paddlepaddle/ernie-4.5-300b-a47b-pt", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751212800 } }, { "id": "paddlepaddle/ernie-4.5-vl-28b-a3b-pt", "name": "paddlepaddle/ernie-4.5-vl-28b-a3b-pt", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751212800 } }, { "id": "qwen/qwen-image-edit", "name": "qwen/qwen-image-edit", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760111983 } }, { "id": "qwen/qwen2.5-14b-instruct", "name": "qwen/qwen2.5-14b-instruct", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1737907200 } }, { "id": "qwen/qwen2.5-14b-instruct-1m", "name": "qwen/qwen2.5-14b-instruct-1m", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1737907200 } }, { "id": "qwen/qwen2.5-7b-instruct-1m", "name": "qwen/qwen2.5-7b-instruct-1m", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1737907200 } }, { "id": "qwen/qwen2.5-coder-14b-instruct", "name": "qwen/qwen2.5-coder-14b-instruct", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1731340800 } }, { "id": "qwen/qwen2.5-vl-3b-instruct", "name": "qwen/qwen2.5-vl-3b-instruct", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1737907200 } }, { "id": "qwen/qwen2.5-vl-7b-instruct", "name": "qwen/qwen2.5-vl-7b-instruct", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1737907200 } }, { "id": "qwen/qwen3-0.6b", "name": "qwen/qwen3-0.6b", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1745856000 } }, { "id": "qwen/qwen3-1.7b", "name": "qwen/qwen3-1.7b", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1745856000 } }, { "id": "qwen/qwen3-4b", "name": "qwen/qwen3-4b", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1745856000 } }, { "id": "shanghai_ai_laboratory/intern-s1", "name": "shanghai_ai_laboratory/intern-s1", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759136265 } }, { "id": "shanghai_ai_laboratory/intern-s1-mini", "name": "shanghai_ai_laboratory/intern-s1-mini", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759136265 } }, { "id": "xgenerationlab/xiyansql-qwencoder-32b-2412", "name": "xgenerationlab/xiyansql-qwencoder-32b-2412", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1732517497 } }, { "id": "xgenerationlab/xiyansql-qwencoder-32b-2504", "name": "xgenerationlab/xiyansql-qwencoder-32b-2504", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1732517497 } }, { "id": "assistant", "name": "assistant", "description": "General-purpose assistant. Write, code, ask for real-time information, create images, and more.\n\nQueries are automatically routed based on the task and subscription status.\n\nFor subscribers:\n- General queries: @GPT-5.1-Instant\n- Web searches: @Web-Search\n- Image generation: @Nano-Banana\n- Video-input tasks: @Gemini-2.5-Pro\n\nFor non-subscribers:\n- General queries: @GPT-4o-Mini\n- Web searches: @Web-Search\n- Image generation: @FLUX-schnell\n- Video-input tasks: @Gemini-2.5-Flash", "owned_by": "Poe", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 12288, "metadata": { "source": "api", "tags": [], "created": 1694610718936 } }, { "id": "gemini-3-pro", "name": "gemini-3-pro", "description": "Gemini 3 Pro is a state-of-the-art model for math, coding, computer use, and long‑horizon agent tasks, delivering top benchmark results including 23.4% on MathArena Apex (up from 1.6%), SOTA on tau-bench, an Elo of 2,439 on LiveCodeBench Pro (vs. 2,234), 72.7% on ScreenSpot‑Pro (~2× the previous best), and a higher mean net worth on Vending‑Bench 2 ($5,478 vs. $3,838). It has a 1M input context window and a max output tokens of 64k.\n\nOptional Parameters:\nTo instruct the bot to use more thinking effort, select from \"Low\" or \"High\"\nTo enable web search and real-time information update, toggle \"enable web search\". This is disabled by default.", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "pricing": { "input": { "per_million_tokens": 1.5999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 9.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1761164773371 } }, { "id": "gpt-5.1-instant", "name": "gpt-5.1-instant", "description": "OpenAI’s most flagship model optimized for conversational intelligence. It excels at natural dialogue, contextual memory, and adaptive tone, making it perfect for interactive agents, tutoring, and customer support. It balances speed, reliability, and empathy for seamless real‑time communication. Supports 128k tokens of input context.", "owned_by": "OpenAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 1.1, "currency": "USD" }, "output": { "per_million_tokens": 9, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1762990086156 } }, { "id": "kling-omni", "name": "kling-omni", "description": "Bot for Kling Omni Image-to-Video inference. Send one image for image-to-video generation and two images for first-to-last frame video generation. Set duration with `--duration`, to either 5 or 10 seconds.\n\nAccepted file type: jpeg, png, webp, heic, heif. \nThis bot does not accept video files.\n\nNote: Prompt is required after attaching images to generate video.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764600004756 } }, { "id": "manus", "name": "manus", "description": "Manus is an autonomous AI agent that executes tasks. It can take a high-level prompt, break it into subtasks, interact with tools/APIs, and deliver end-to-end results (like reports, code, websites, images, and more) without you managing each step.\n\nNotes: \n- In Agent mode, responses may take several minutes to complete. \n- Sometimes, files that Manus has created are incorrectly uploaded to the Poe message. In such cases, please check the Manus chat for the file. \n\nParameter controls available:\n1. Task Mode\n- Default: '--task_mode adaptive' (smart routing: may choose Chat or Agent)\n- Conversational single turn:' --task_mode chat' (fixed price)\n- Autonomous multi-step: '--task_mode agent'\n2. Agent Profile\n- Default: '--agent_profile manus-1.5' (maximum quality/depth)\n- Lower usage: '--agent_profile manus-1.5-lite' (typically ~half the credits)", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761671380766 } }, { "id": "zai-glm-4.6-cs", "name": "zai-glm-4.6-cs", "description": "World’s fastest inference for ZAI GLM 4.6 with Cerebras. ZAI GLM 4.6 is a high‑performance AI model designed for advanced reasoning, superior coding, and effective tool use. It supports structured outputs, parallel tool calling, and real‑time streaming responses. Optimized for agentic coding and automation tasks, the model delivers strong real‑world performance with a context window of up to 131K tokens and output up to 40K tokens.\nFor more information see: https://inference-docs.cerebras.ai/models/zai-glm-46\n\nContext Limit: 131k", "owned_by": "CerebrasAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1762822934670 } }, { "id": "grok-4.1-fast-reasoning", "name": "grok-4.1-fast-reasoning", "description": "Grok-4.1-Fast-Reasoning is a high-performance version of xAI’s Grok 4.1 Fast, the company’s best agentic tool‑calling model. It works great in real-world use cases like customer support, deep research, and advanced analytical reasoning. Equipped with 2M‑token context window, this model processes vast information seamlessly, delivering coherent, context‑aware, and deeply reasoned insights at exceptional speed.", "owned_by": "XAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 2000000, "metadata": { "source": "api", "tags": [], "created": 1763585722548 } }, { "id": "nano-banana-pro", "name": "nano-banana-pro", "description": "Nano Banana Pro (Gemini 3 Pro Image Preview) can make detailed, context-rich visuals, precisely edit or restyle input images with exceptional fidelity, and even generate legible text in images in multiple languages.\n\nOptional parameters:\n`--aspect_ratio` (options: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9): Aspect ratio of the output image\n`--web_search true` to enable web search and real-time information access, this is disabled by default.\n`--image_only` (defaults: False): Determines whether to only generate image output\n`--image_size` (options: 1K, 2K, 4K): Resolution of image", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "pricing": { "input": { "per_million_tokens": 1.5999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 9.6, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1763518946353 } }, { "id": "nano-banana", "name": "nano-banana", "description": "Google DeepMind's Nano Banana (i.e. Gemini 2.5 Flash Image model) offers image generation and editing capabilities, state-of-the-art performance in photo-realistic multi-turn edits at exceptional speeds. Supports a maximum input context of 32k tokens.\n\nOptional parameters:\n--aspect_ratio (options: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9): Aspect ratio of the output image\n--image_only (defaults: False): Determines whether to only generate image output", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "pricing": { "input": { "per_million_tokens": 0.21, "currency": "USD" }, "output": { "per_million_tokens": 1.7999999999999998, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1755817420757 } }, { "id": "grok-4.1-fast-non-reasoning", "name": "grok-4.1-fast-non-reasoning", "description": "Grok-4.1-Fast-Non-Reasoning is a streamlined companion to Grok 4.1 Fast, xAI’s best agentic tool‑calling model. It has 2M context window and high responsiveness but is optimized for non‑reasoning tasks — excelling at text generation, summarization, and automated workflows that demand speed and efficiency over deep logic. Ideal for high-throughput use cases like customer support automation, bulk content creation, and fast conversational responses.", "owned_by": "XAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 2000000, "metadata": { "source": "api", "tags": [], "created": 1763585662409 } }, { "id": "qwen-3-next-80b-think", "name": "qwen-3-next-80b-think", "description": "The Qwen3-Next-80B-Think (with thinking mode enabled by default) is the next-generation foundation model released by Qwen, optimized for extreme context length and large-scale parameter efficiency, also known as \"Qwen3-Next-80B-A3B-Thinking.\" Despite its ultra-efficiency, it outperforms Qwen3-32B on downstream tasks - while requiring less than 1/10 of the inference cost. Moreover, it delivers over 10x higher inference throughput than Qwen3-32B when handling contexts longer than 32k tokens. This is the thinking version of https://poe.com/Qwen3-Next-80B, supports 65k tokens of context. \n\nOptional Parameters:\nUse additional input beside attachment button to manage the optional parameters:\n1. Enable/Disable Thinking - This will cause the model to think about the response before giving a final answer.\n\nTechnical Specifications:\nFile Support: PDF, DOC and XLSX files\nFile Attachment Limitation: Audio, video and image files\nContext Window: 65k tokens", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757556610505 } }, { "id": "qwen3-next-80b", "name": "qwen3-next-80b", "description": "The Qwen3-Next-80B is the next-generation foundation model released by Qwen, optimized for extreme context length and large-scale parameter efficiency, also known as \"Qwen3-Next-80B-A3B.\" Despite its ultra-efficiency, it outperforms Qwen3-32B on downstream tasks - while requiring less than 1/10 of the training cost.\nMoreover, it delivers over 10x higher inference throughput than Qwen3-32B when handling contexts longer than 32k tokens. \nUse `--enable_thinking false` to disable thinking mode before giving an answer.\nThis is the non-thinking version of https://poe.com/Qwen3-Next-80B-Think; supports 65k tokens of context.", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757556042820 } }, { "id": "nova-pro-1.0", "name": "nova-pro-1.0", "description": "Amazon Nova Pro 1.0 is a highly capable multimodal foundation model from Amazon Nova, offering a strong balance of accuracy, speed, and cost for processing text, images, and video. Its context window is 300,000 tokens, which enables handling very large inputs (including up to ~30 minutes of video input) in a single request.\n\nUse ‘--enable_latency_optimized [false/true]’ (default false) to disable/enable the latency optimized inference accordingly. Note that if enabled, costs may increase. Check the rate card for more information.", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1733715164341 } }, { "id": "nova-premier-1.0", "name": "nova-premier-1.0", "description": "The Amazon Nova Premier 1.0 model is Amazon’s most capable foundation model, able to handle extremely long contexts (≈ 1 million tokens) and multimodal inputs like text, images, and video while excelling at complex, multi‑step tasks across tools and data sources. \n\nIt supports chain‑of‑thought style reasoning and breaks down problems into intermediate steps before arriving at an answer, improving coherence and accuracy.\n\nUse '--enable_thinking [true/false]' (default true) to enable/disable thinking accordingly.", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757959733022 } }, { "id": "nova-micro-1.0", "name": "nova-micro-1.0", "description": "Amazon Nova Micro is a text-only foundation model in the Amazon Nova family, designed for ultra‑low latency and very low cost, optimized for tasks like summarization, translation, and interactive chat. It supports a context window of 128,000 tokens, enabling handling of large text inputs in a single request.", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1733714662051 } }, { "id": "nova-lite-1.0", "name": "nova-lite-1.0", "description": "Amazon Nova Lite is a low‑cost multimodal foundation model from Amazon that can process text, images, and video and is optimized for speed and affordability. It offers a context window of 300,000 tokens, allowing handling of very large inputs in a single request (including up to ~30 minutes of video).", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1733713614756 } }, { "id": "kling-image-o1", "name": "kling-image-o1", "description": "Kling Image O1 image generation and image editing bot. Send up to 10 images to use as a reference, and refer to each image with $image1, $image2, etc. in the prompt to specify interactions. Set resolution with `--resolution` and aspect ratio with `--aspect`. Note: `auto` aspect ratio is default and can be used only for editing, text-to-image generation has a default of `1:1`. Supports jpeg, png, webp, heif/heic images.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764675160909 } }, { "id": "kling-2.6-pro", "name": "kling-2.6-pro", "description": "Generate high-quality videos with native audio from text and images using Kling 2.6 Pro. Use `--negative_prompt` to send a negative prompt, and `--cfg_scale` to send a classifier-free guidance scale between 0.0 and 1.0 (inclusive). Use `--aspect` to set the aspect ratio (One of `16:9`, `9:16` and `1:1`, only works for text-to-video). Use `--duration` to set either 5 or 10 second video. Use --silent to generate a silent video.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764786565451 } }, { "id": "flux-2-dev", "name": "flux-2-dev", "description": "Open-weight image gen (32B) model, derived from the FLUX.2 base model. The most powerful open-weight image generation and editing model available today, combining text-to-image synthesis and image editing with multiple input images in a single checkpoint.\n\nOptional parameters:\n`--aspect` to set aspect ratio: 16:9, 4:3, 1:1, 3:4, 9:16", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764084262252 } }, { "id": "gpt-oss-120b-t", "name": "gpt-oss-120b-t", "description": "OpenAI's GPT-OSS-120B delivers sophisticated chain-of-thought reasoning capabilities in a fully open model. Built with community feedback and released under Apache 2.0, this 120B parameter model provides transparency, customization, and deployment flexibility for organizations requiring complete data security & privacy control.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1754415494029 } }, { "id": "gpt-oss-20b-t", "name": "gpt-oss-20b-t", "description": "OpenAI's GPT-OSS-20B provides powerful chain-of-thought reasoning in an efficient 20B parameter model. Designed for single-GPU deployment while maintaining sophisticated reasoning capabilities, this Apache 2.0 licensed model offers the perfect balance of performance and resource efficiency for diverse applications.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754495737130 } }, { "id": "amazon-nova-reel-1.1", "name": "amazon-nova-reel-1.1", "description": "Amazon Nova Reel 1.1 is an advanced AI video generation model that creates up to 2-minute multi-shot videos from text and optional image prompts, offering improved video quality, latency, and visual consistency compared to its predecessor.", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757629656513 } }, { "id": "kimi-k2-think-t", "name": "kimi-k2-think-t", "description": "Kimi K2 Thinking is Moonshot AI's most capable open-source thinking model, built as a thinking agent that reasons step-by-step while dynamically invoking tools. Setting new state-of-the-art records on Humanity's Last Exam (HLE), BrowseComp, and other benchmarks, K2 Thinking dramatically scales multi-step reasoning depth while maintaining stable tool-use across 200–300 sequential calls — a breakthrough in long-horizon agency with native INT4 quantization for 2x inference speed.\n\nSupported File Types: JPEG, PNG, PDF", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1762824745719 } }, { "id": "amazon-nova-canvas", "name": "amazon-nova-canvas", "description": "Amazon Nova Canvas is a high-quality image‐generation model that creates and edits images from text or image inputs—offering features like inpainting/outpainting, virtual try‑on, style controls, and background removal—all with built‑in customization.", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757741443323 } }, { "id": "kimi-k2-0905-t", "name": "kimi-k2-0905-t", "description": "The new Kimi K2-0905 model from Moonshot AI features a massive 256,000-token context window, double the length of its predecessor (Kimi K2), along with greatly improved coding abilities and front-end generation accuracy. It boasts 1 trillion total parameters (with 32 billion activated at a time) and claims 100% tool-call success in real-world tests, setting a new bar for open-source AI performance in complex, multi-step tasks", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757044663632 } }, { "id": "kimi-k2-t", "name": "kimi-k2-t", "description": "Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1752510412371 } }, { "id": "kimi-k2-instruct", "name": "kimi-k2-instruct", "description": "Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities. Uses the latest September 5th, 2025 snapshot. The updated version has improved coding abilities, agentic tool use, and a longer (256K) context window.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1752519798608 } }, { "id": "glm-4.6-fw", "name": "glm-4.6-fw", "description": "As the latest iteration in the GLM series, GLM-4.6 achieves comprehensive enhancements across multiple domains, including real-world coding, long-context processing, reasoning, searching, writing, and agentic applications.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761106926969 } }, { "id": "deepseek-v3.1-t", "name": "deepseek-v3.1-t", "description": "DeepSeek-V3.1 is a hybrid model that supports both thinking mode and non-thinking mode. Compared to the previous version, this upgrade brings improvements in multiple aspects: Hybrid thinking mode: One model supports both thinking mode and non-thinking mode by changing the chat template. Smarter tool calling: Through post-training optimization, the model's performance in tool usage and agent tasks has significantly improved. Higher thinking efficiency: DeepSeek-V3.1-Think achieves comparable answer quality to DeepSeek-R1-0528, while responding more quickly.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1756261097964 } }, { "id": "deepseek-v3.1-n", "name": "deepseek-v3.1-n", "description": "DeepSeek-V3.1 is a hybrid model that supports both thinking mode and non-thinking mode. Compared to the previous version, this upgrade brings improvements in multiple aspects:\n\n- Hybrid thinking mode: One model supports both thinking mode and non-thinking mode by changing the chat template.\n- Smarter tool calling: Through post-training optimization, the model's performance in tool usage and agent tasks has significantly improved.\n- Higher thinking efficiency: DeepSeek-V3.1-Think achieves comparable answer quality to DeepSeek-R1-0528, while responding more quickly.\n\nTechnical Specifications\n\nFile Support: Attachments not supported\nContext window: 128k tokens", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755623272928 } }, { "id": "claude-opus-4-reasoning", "name": "claude-opus-4-reasoning", "description": "Claude Opus 4 from Anthropic, supports customizable thinking budget (up to 30k tokens) and 200k context window.\nTo instruct the bot to use more thinking effort, add --thinking_budget and a number ranging from 0 to 30,768 to the end of your message.", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 196608, "pricing": { "input": { "per_million_tokens": 13, "currency": "USD" }, "output": { "per_million_tokens": 64, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1747865908863 } }, { "id": "claude-sonnet-4-reasoning", "name": "claude-sonnet-4-reasoning", "description": "Claude Sonnet 4 from Anthropic, supports customizable thinking budget (up to 60k tokens) and 200k context window.\nTo instruct the bot to use more thinking effort, add --thinking_budget and a number ranging from 0 to 61,440 to the end of your message.", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 983040, "pricing": { "input": { "per_million_tokens": 2.6, "currency": "USD" }, "output": { "per_million_tokens": 13, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1747865657124 } }, { "id": "glm-4.5-air-t", "name": "glm-4.5-air-t", "description": "The GLM-4.5 series models are foundation models designed for intelligent agents. GLM-4.5 has 355 billion total parameters with 32 billion active parameters, while GLM-4.5-Air adopts a more compact design with 106 billion total parameters and 12 billion active parameters. GLM-4.5 models unify reasoning, coding, and intelligent agent capabilities to meet the complex demands of intelligent agent applications.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754691854718 } }, { "id": "glm-4.5-fw", "name": "glm-4.5-fw", "description": "The GLM-4.5 series models are foundation models designed for intelligent agents. GLM-4.5 has 355 billion total parameters with 32 billion active parameters. It unifies reasoning, coding, and intelligent agent capabilities to meet the complex demands of intelligent agent applications.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753915796429 } }, { "id": "elevenlabs-v3", "name": "elevenlabs-v3", "description": "ElevenLabs v3 is a cutting-edge text-to-speech model that brings scripts to life with remarkable realism and performance-level control. Unlike traditional TTS systems, it allows creators to shape the emotional tone, pacing, and soundscape of their audio through the use of inline audio tags. These tags are enclosed in square brackets and act as stage directions—guiding how a line is spoken or what sound effects are inserted—without being spoken aloud. This enables rich, expressive narration and dialogue for applications like audiobooks, games, podcasts, and interactive media. Whether you’re aiming for a tense whisper, a sarcastic remark, or a dramatic soundscape full of explosions and ambient effects, v3 gives you granular control directly in the text prompt. This bot will also run text-to-speech on PDF attachments / URL links.\n\nExamples of voice delivery tags include:\n* [whispers] I have to tell you a secret. \n* [angry] That was *never* the plan.\n* [sarcastic] Oh, sure. That’ll totally work.\n* and [laughs] You're hilarious.\n\nExamples of sound effect tags are:\n* [gunshot] Get down!\n* [applause] Thank you, everyone.\n* and [explosion] What was that?!\n\nThese can also be combined.\n\nMultiple speakers can be supported via the parameter control. Dialogue for multiple speakers must follow the format, e.g. for 3 speakers:\n\nSpeaker 1: [dialogue]\nSpeaker 2: [dialogue]\nSpeaker 3: [dialogue]\nSpeaker 1: [dialogue]\nSpeaker 2: [dialogue]\n--speaker_count 3 --voice_1 [voice_1] --voice_2 [voice_2] --voice_3 [voice_3]\n\nThe following voices are supported:\nAlexandra - Conversational & Real\nAmy - Young & Natural\nArabella - Mature Female Narrator\nAustin - Good Ol' Texas Boy\nBlondie - Warm & Conversational\nBradford - British Male Storyteller\nCallum - Gravelly Yet Unsettling\nCharlotte - Raspy & Sensual\nChris - Down-to-Earth\nCoco Li - Shanghainese Female\nGaming - Unreal Tonemanagement 2003\nHarry - Animated Warrior\nHayato - Soothing Zen Male\nHope - Upbeat & Clear\nJames - Husky & Engaging\nJames Gao - Calm Chinese Voice\nJane - Professional Audiobook Reader\nJessica - Playful American Female\nJuniper - Grounded Female Professional\nKaro Yang - Youthful Asian Male\nKuon - Acute Fantastic Female\nLaura - Quirky Female Voice\nLiam - Warm, Energetic Youth\nMonika Sogam - Indian-English Accent\nNichalia Schwartz - Engaging Female American\nPriyanka Sogam - Late-Night Radio\nReginald - Brooding, Intense Villain\nShanShan - Young, Energetic Female\nXiao Bai - Shrill & Annoying\n\nPrompt input cannot exceed 5,000 characters.", "owned_by": "ElevenLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1749151405074 } }, { "id": "deepseek-v3-fw", "name": "deepseek-v3-fw", "description": "DeepSeek-V3 is an open-source Mixture-of-Experts (MoE) language model; able to perform well on competitive benchmarks with cost-effective training & inference. All data submitted to this bot is governed by the Poe privacy policy and is sent to Fireworks, a US-based company. Supports 131k context window and max output of 131k tokens. Updated to serve the latest March 24th, 2025 snapshot.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 160000, "metadata": { "source": "api", "tags": [], "created": 1735687236887 } }, { "id": "deepseek-v3.1-tm", "name": "deepseek-v3.1-tm", "description": "DeepSeek-V3.1-Terminus preserves all original model capabilities while resolving key user-reported issues, including:\n- Language consistency: Significantly reducing mixed Chinese-English output and eliminating abnormal character occurrences\n- Agent performance: Enhanced optimization of both Code Agent and Search Agent functionality\n- Use `--enable_thinking false` to disable thinking about the response before giving a final answer.\n- The bot does not accept attachment. It also does not support billing logic\n\nContext window: 128k tokens.", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758553230099 } }, { "id": "llama-4-scout-t", "name": "llama-4-scout-t", "description": "Llama 4 Scout, fast long-context multimodal model from Meta. A 16-expert MoE model that excels at multi-document analysis, codebase reasoning, and personalized tasks. A smaller model than Maverick but state of the art in its size & with text + image input support. Supports 300k context.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 300000, "metadata": { "source": "api", "tags": [], "created": 1743891662563 } }, { "id": "claude-opus-4-search", "name": "claude-opus-4-search", "description": "Claude Opus 4 with access to real-time information from the web. Supports customizable thinking budget of up to 126k tokens.\nTo instruct the bot to use more thinking effort, add --thinking_budget and a number ranging from 0 to 126,000 to the end of your message.", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 196608, "pricing": { "input": { "per_million_tokens": 13, "currency": "USD" }, "output": { "per_million_tokens": 64, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1750451340055 } }, { "id": "claude-sonnet-4-search", "name": "claude-sonnet-4-search", "description": "Claude Sonnet 4 with access to real-time information from the web. Supports customizable thinking budget of up to 126k tokens.\nTo instruct the bot to use more thinking effort, add --thinking_budget and a number ranging from 0 to 126,000 to the end of your message.", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 983040, "pricing": { "input": { "per_million_tokens": 2.6, "currency": "USD" }, "output": { "per_million_tokens": 13, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1750451236340 } }, { "id": "claude-sonnet-3.7", "name": "claude-sonnet-3.7", "description": "Claude Sonnet 3.7 is a hybrid reasoning model, producing near-instant responses or extended, step-by-step thinking. For the maximum extending thinking, please use https://poe.com/Claude-Sonnet-Reasoning-3.7. Supports a 200k token context window.\nTo instruct the bot to use more thinking effort, add --thinking_budget and a number ranging from 0 to 16,384 to the end of your message.", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 196608, "pricing": { "input": { "per_million_tokens": 2.6, "currency": "USD" }, "output": { "per_million_tokens": 13, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1739926818142 } }, { "id": "claude-sonnet-3.5", "name": "claude-sonnet-3.5", "description": "Anthropic's Claude Sonnet 3.5 using the October 22, 2024 model snapshot. Excels in complex tasks like coding, writing, analysis and visual processing. Has a context window of 200k of tokens (approximately 150k English words).", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 189096, "pricing": { "input": { "per_million_tokens": 2.6, "currency": "USD" }, "output": { "per_million_tokens": 13, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1717554300318 } }, { "id": "claude-haiku-3.5", "name": "claude-haiku-3.5", "description": "The latest generation of Anthropic's fastest model. Claude Haiku 3.5 has fast speeds and improved instruction following. ", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 189096, "pricing": { "input": { "per_million_tokens": 0.6799999999999999, "currency": "USD" }, "output": { "per_million_tokens": 3.4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1727818578813 } }, { "id": "claude-sonnet-3.7-search", "name": "claude-sonnet-3.7-search", "description": "Claude Sonnet 3.7 with access to real-time information from the web.\nTo instruct the bot to use more thinking effort, add --thinking_budget and a number ranging from 0 to 126,000 to the end of your message.", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 196608, "pricing": { "input": { "per_million_tokens": 2.6, "currency": "USD" }, "output": { "per_million_tokens": 13, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1747285973996 } }, { "id": "claude-haiku-3.5-search", "name": "claude-haiku-3.5-search", "description": "Claude Haiku 3.5 with access to real-time information from the web.", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 189096, "pricing": { "input": { "per_million_tokens": 0.6799999999999999, "currency": "USD" }, "output": { "per_million_tokens": 3.4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1747285932473 } }, { "id": "gpt-oss-120b-cs", "name": "gpt-oss-120b-cs", "description": "World’s fastest inference for GPT OSS 120B with Cerebras. OpenAI's GPT-OSS-120B delivers sophisticated chain-of-thought reasoning capabilities in a fully open model. The bot does not accept video, ppt, docx and excel files.", "owned_by": "CerebrasAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754490145525 } }, { "id": "openai-gpt-oss-120b", "name": "openai-gpt-oss-120b", "description": "GPT-OSS-120b is a high-performance, open-weight language model designed for production-grade, general-purpose use cases. It fits on a single H100 GPU, making it accessible without requiring multi-GPU infrastructure. Trained on the Harmony response format, it excels at complex reasoning and supports configurable reasoning effort, full chain-of-thought transparency for easier debugging and trust, and native agentic capabilities for function calling, tool use, and structured outputs.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1754416223840 } }, { "id": "openai-gpt-oss-20b", "name": "openai-gpt-oss-20b", "description": "GPT-OSS-20B is a compact, open-weight language model optimized for low-latency and resource-constrained environments, including local and edge deployments. It shares the same Harmony training foundation and capabilities as 120B, with faster inference and easier deployment that is ideal for specialized or offline use cases, fast responsive performance, chain-of-thought output, and agentic workflows.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1754418551040 } }, { "id": "qwen3-next-instruct-t", "name": "qwen3-next-instruct-t", "description": "Qwen3-Next Instruct features a highly sparse MoE structure that activates only 3B of its 80B parameters during inference. Supports only instruct mode without thinking blocks, delivering performance on par with Qwen3-235B-A22B-Instruct-2507 on certain benchmarks while using less than 10% training cost and providing 10x+ higher throughput on contexts over 32K tokens.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759346886115 } }, { "id": "qwen3-next-think-t", "name": "qwen3-next-think-t", "description": "Qwen3-Next Thinking features the same highly sparse MoE architecture but specialized for complex reasoning tasks. Supports only thinking mode with automatic tag inclusion, delivering exceptional analytical performance while maintaining extreme efficiency with 10x+ higher throughput on long contexts and may generate longer thinking content than predecessors.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759347481189 } }, { "id": "qwen3-max-n", "name": "qwen3-max-n", "description": "Qwen3-Max is an updated release built on the Qwen3 series, offering major improvements in reasoning, instruction following, multilingual support, and long-tail knowledge coverage compared to the January 2025 version. It delivers higher accuracy in math, coding, logic, and science tasks, follows complex instructions in Chinese and English more reliably, reduces hallucinations, and produces higher-quality responses for open-ended Q&A, writing, and conversation. The model supports over 100 languages with stronger translation and commonsense reasoning, and is optimized for retrieval-augmented generation (RAG) and tool calling, though it does not include a dedicated “thinking” mode.\n\nFile Support: Text, Markdown and PDF files\nContext window: 256k tokens", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1762329665354 } }, { "id": "qwen3-vl-235b-a22b-t", "name": "qwen3-vl-235b-a22b-t", "description": "Qwen3-VL is the most advanced vision-language model in the Qwen series, offering enhanced text understanding, visual reasoning, spatial perception, and agent capabilities. It supports Dense/MoE architectures and Instruct/Thinking editions for versatile deployment.\n\nKey Features:\n- Visual Agent: Operates GUIs, recognizes elements, invokes tools, and completes tasks.\n- Coding Boost: Generates Draw.io, HTML, CSS, and JS from images/videos.\n- Spatial Perception: Enables 2D/3D reasoning with strong object positioning and occlusion analysis.\n- Long Context: Processes up to 1M tokens for books or long videos.\n- Multimodal Reasoning: Excels in STEM, math, causal analysis, and evidence-based answers.\n- Visual Recognition: Recognizes a wide range of objects, landmarks, and more.\n- OCR: Supports 32 languages with improved performance in challenging conditions.\n- Text-Vision Fusion: Achieves seamless, unified comprehension.\n\nIdeal for multimodal reasoning, spatial analysis, and integrated text-vision tasks.\n\nTechnical Specifications\n\nFile Support: Image, Video, PDF and Markdown files\nContext window: 128k tokens", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758695878297 } }, { "id": "qwen3-vl-235b-a22b-i", "name": "qwen3-vl-235b-a22b-i", "description": "This generation delivers comprehensive upgrades across the board: superior text understanding & generation, deeper visual perception & reasoning, extended context length, enhanced spatial and video dynamics comprehension, and stronger agent interaction capabilities.\n\nAvailable in Dense and MoE architectures that scale from edge to cloud, with Instruct and reasoning‑enhanced Thinking editions for flexible, on‑demand deployment.\n\nKey Enhancements:\nVisual Agent: Operates PC/mobile GUIs—recognizes elements, understands functions, invokes tools, completes tasks.\n\nVisual Coding Boost: Generates Draw.io/HTML/CSS/JS from images/videos.\n\nAdvanced Spatial Perception: Judges object positions, viewpoints, and occlusions; provides stronger 2D grounding and enables 3D grounding for spatial reasoning and embodied AI.\n\nLong Context & Video Understanding: Native 256K context, expandable to 1M; handles books and hours-long video with full recall and second-level indexing.\n\nEnhanced Multimodal Reasoning: Excels in STEM/Math—causal analysis and logical, evidence-based answers.\n\nUpgraded Visual Recognition: Broader, higher-quality pretraining is able to \"recognize everything\"—celebrities, anime, products, landmarks, flora/fauna, etc.\n\nExpanded OCR: Supports 32 languages (up from 19); robust in low light, blur, and tilt; better with rare/ancient characters and jargon; improved long-document structure parsing.\n\nText Understanding on par with pure LLMs: Seamless text–vision fusion for lossless, unified comprehension.\n\nTechnical Specifications\n\nFile Support: Image, Video, PDF and Markdown files\nContext window: 128k tokens", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758695977113 } }, { "id": "qwen-3-235b-2507-t", "name": "qwen-3-235b-2507-t", "description": "Qwen3 235B A22B 2507, currently the best instruct model (non-reasoning) among both closed and open source models. It excels in instruction following, logical reasoning, text comprehension, mathematics, science, coding and tool usage. It is also great at multilingual tasks and supports a long context window (262k).", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "metadata": { "source": "api", "tags": [], "created": 1745978851479 } }, { "id": "qwen3-235b-2507-fw", "name": "qwen3-235b-2507-fw", "description": "State-of-the-art language model with exceptional math, coding, and problem-solving performance. Operates in non-thinking mode, and does not generate blocks in its output. Supports 256k tokens of native context length. All data provided will not be used in training, and is sent only to Fireworks AI, a US-based company. Uses the latest July 21st, 2025 snapshot (Qwen3-235B-A22B-Instruct-2507).", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 100000, "metadata": { "source": "api", "tags": [], "created": 1745952547301 } }, { "id": "qwen3-235b-2507-cs", "name": "qwen3-235b-2507-cs", "description": "World's fastest inference with Qwen3 235B Instruct (2507) model with Cerebras. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage.", "owned_by": "CerebrasAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754489704731 } }, { "id": "qwen3-coder-480b-t", "name": "qwen3-coder-480b-t", "description": "Qwen3‑Coder‑480B is a state of the art mixture‑of‑experts (MoE) code‑specialized language model with 480 billion total parameters and 35 billion activated parameters. Qwen3‑Coder delivers exceptional performance across code generation, function calling, tool use, and long‑context reasoning. It natively supports up to 262,144‑token context windows, making it ideal for large repository and multi‑file coding tasks.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753465729255 } }, { "id": "qwen3-coder-480b-n", "name": "qwen3-coder-480b-n", "description": "Qwen3-Coder-480B-A35B-Instruct delivers Claude Sonnet-comparable performance on agentic coding and browser tasks while supporting 256K-1M token long-context processing and multi-platform agentic coding capabilities. \n\nTechnical Specifications\n\nFile Support: Attachments not supported\nContext window: 256k tokens", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755222889121 } }, { "id": "qwen3-235b-a22b-di", "name": "qwen3-235b-a22b-di", "description": "Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support.\n\nSupports 32k tokens of input context and 8k tokens of output context. Quantization: FP8.", "owned_by": "DeepInfra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32000, "metadata": { "source": "api", "tags": [], "created": 1746004656402 } }, { "id": "qwen3-235b-a22b-n", "name": "qwen3-235b-a22b-n", "description": "It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks). The Bot does not currently support attachments.\nThis feature the following key enhancements:\n- Significant improvements in general capabilities, including instruction following, logical reasoning, text comprehension, mathematics, science, coding and tool usage.\n- Substantial gains in long-tail knowledge coverage across multiple languages.\n- Markedly better alignment with user preferences in subjective and open-ended tasks, enabling more helpful responses and higher-quality text generation.\n- Enhanced capabilities in 256K long-context understanding.\n\nTechnical Specifications\n\nFile Support: Attachments not supported\nContext window: 128k tokens", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754050170519 } }, { "id": "cartesia-ink-whisper", "name": "cartesia-ink-whisper", "description": "Transcribe audio files using Speech-to-Text with the Cartesia Ink Whisper model.\n\nSelect the Language (`--language`) of your audio file in Settings. Default is English (en).\n\nSupported Languages:\nEnglish (en)\nChinese (zh)\nGerman (de)\nSpanish (es)\nRussian (ru)\nKorean (ko)\nFrench (fr)\nJapanese (ja)\nPortuguese (pt)\nTurkish (tr)\nPolish (pl)\nCatalan (ca)\nDutch (nl)\nArabic (ar)\nSwedish (sv)\nItalian (it)\nIndonesian (id)\nHindi (hi)\nFinnish (fi)\nVietnamese (vi)\nHebrew (he)\nUkrainian (uk)\nGreek (el)\nMalay (ms)\nCzech (cs)\nRomanian (ro)\nDanish (da)\nHungarian (hu)\nTamil (ta)\nNorwegian (no)\nThai (th)\nUrdu (ur)\nCroatian (hr)\nBulgarian (bg)\nLithuanian (lt)\nLatin (la)\nMaori (mi)\nMalayalam (ml)\nWelsh (cy)\nSlovak (sk)\nTelugu (te)\nPersian (fa)\nLatvian (lv)\nBengali (bn)\nSerbian (sr)\nAzerbaijani (az)\nSlovenian (sl)\nKannada (kn)\nEstonian (et)\nMacedonian (mk)\nBreton (br)\nBasque (eu)\nIcelandic (is)\nArmenian (hy)\nNepali (ne)\nMongolian (mn)\nBosnian (bs)\nKazakh (kk)\nAlbanian (sq)\nSwahili (sw)\nGalician (gl)\nMarathi (mr)\nPunjabi (pa)\nSinhala (si)\nKhmer (km)\nShona (sn)\nYoruba (yo)\nSomali (so)\nAfrikaans (af)\nOccitan (oc)\nGeorgian (ka)\nBelarusian (be)\nTajik (tg)\nSindhi (sd)\nGujarati (gu)\nAmharic (am)\nYiddish (yi)\nLao (lo)\nUzbek (uz)\nFaroese (fo)\nHaitian Creole (ht)\nPashto (ps)\nTurkmen (tk)\nNynorsk (nn)\nMaltese (mt)\nSanskrit (sa)\nLuxembourgish (lb)\nMyanmar (my)\nTibetan (bo)\nTagalog (tl)\nMalagasy (mg)\nAssamese (as)\nTatar (tt)\nHawaiian (haw)\nLingala (ln)\nHausa (ha)\nBashkir (ba)\nJavanese (jw)\nSundanese (su)\nCantonese (yue)\n", "owned_by": "Cartesia AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757628728993 } }, { "id": "glm-4.6-t", "name": "glm-4.6-t", "description": "GLM-4.6 is the latest flagship model from Z.ai's GLM series, delivering state-of-the-art agentic and coding capabilities that rival Claude Sonnet 4. With 357B parameters in a Mixture-of-Experts architecture, an expanded 200K context window, and 30% improved token efficiency, GLM-4.6 represents the top-performing model developed in China.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1762826676738 } }, { "id": "llama-3.1-8b-di", "name": "llama-3.1-8b-di", "description": "The smallest and fastest model from Meta's Llama 3.1 family. This open-source language model excels in multilingual dialogue, outperforming numerous industry benchmarks for both closed and open-source conversational AI systems. All data you submit to this bot is governed by the Poe privacy policy and is only sent to DeepInfra, a US-based company.\n\nInput token limit 128k, output token limit 8k. Quantization: FP16 (official).", "owned_by": "DeepInfra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1740488781419 } }, { "id": "claude-sonnet-3.7-reasoning", "name": "claude-sonnet-3.7-reasoning", "description": "Reasoning capabilities on by default. Claude Sonnet 3.7 is a hybrid reasoning model, producing near-instant responses or extended, step-by-step thinking. Recommended for complex math or coding problems. Supports a 200k token context window.\nTo instruct the bot to use more thinking effort, add --thinking_budget and a number ranging from 0 to 126,000 to the end of your message.", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 196608, "pricing": { "input": { "per_million_tokens": 2.6, "currency": "USD" }, "output": { "per_million_tokens": 13, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1739926096905 } }, { "id": "inception-mercury", "name": "inception-mercury", "description": "Mercury is the first diffusion large language model (dLLM). On Copilot Arena, Mercury Coder ranks 1st in speed and ties for 2nd in quality. A new generation of LLMs that push the frontier of fast, high-quality text generation.", "owned_by": "Inception Labs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750952818304 } }, { "id": "inception-mercury-coder", "name": "inception-mercury-coder", "description": "Mercury Coder is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like Claude 3.5 Haiku and GPT-4o Mini while matching their performance. Mercury Coder Small's speed means that developers can stay in the flow while coding, enjoying rapid chat-based iteration and responsive code completion suggestions. On Copilot Arena, Mercury Coder ranks 1st in speed and ties for 2nd in quality. Read more in the blog post here: https://www.inceptionlabs.ai/introducing-mercury.", "owned_by": "Inception Labs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1747072614396 } }, { "id": "mistral-medium", "name": "mistral-medium", "description": "Mistral AI's medium-sized model. Supports a context window of 32k tokens (around 24,000 words) and is stronger than Mixtral-8x7b and Mistral-7b on benchmarks across the board.", "owned_by": "Mistral", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2.7, "currency": "USD" }, "output": { "per_million_tokens": 8.1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1703096777397 } }, { "id": "kat-coder-pro-v1", "name": "kat-coder-pro-v1", "description": "KAT-Coder-Pro V1 possesses advanced intelligent agent capabilities such as multi-tool parallel invocation, enabling autonomous completion of complex tasks with fewer interactions, featuring stronger code comprehension and logical reasoning, delivering ultimate performance for AI Coding.\n\nFile Support: Text, Markdown and PDF files\nContext window: 256k tokens", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759294887897 } }, { "id": "llama-4-maverick-t", "name": "llama-4-maverick-t", "description": "Llama 4 Maverick, state of the art long-context multimodal model from Meta. A 128-expert MoE powerhouse for multilingual image/text understanding (12 languages), creative writing, and enterprise-scale applications—outperforming Llama 3.3 70B. Supports 500k tokens context.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 524288, "metadata": { "source": "api", "tags": [], "created": 1743883014548 } }, { "id": "llama-3.3-70b-fw", "name": "llama-3.3-70b-fw", "description": "Meta's Llama 3.3 70B Instruct, hosted by Fireworks AI. Llama 3.3 70B is a new open source model that delivers leading performance and quality across text-based use cases such as synthetic data generation at a fraction of the inference cost, improving over Llama 3.1 70B.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1733508651951 } }, { "id": "deepseek-r1-fw", "name": "deepseek-r1-fw", "description": "State-of-the-art large reasoning model problem solving, math, and coding performance at a fraction of the cost; explains its chain of thought. All data you provide this bot will not be used in training, and is sent only to Fireworks AI, a US-based company. Supports 164k tokens of input context and 164k tokens of output context. Uses the latest May 28th, 2025 snapshot.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 160000, "metadata": { "source": "api", "tags": [], "created": 1737499802568 } }, { "id": "deepseek-r1-di", "name": "deepseek-r1-di", "description": "Top open-source reasoning LLM rivaling OpenAI's o1 model; delivers top-tier performance across math, code, and reasoning tasks at a fraction of the cost. All data you provide this bot will not be used in training, and is sent only to DeepInfra, a US-based company.\n\nSupports 64k tokens of input context and 8k tokens of output context. Quantization: FP8 (official).", "owned_by": "DeepInfra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 64000, "metadata": { "source": "api", "tags": [], "created": 1740487208576 } }, { "id": "deepseek-r1-n", "name": "deepseek-r1-n", "description": "The DeepSeek-R1 (latest Snapshot model DeepSeek-R1-0528) model features enhanced reasoning and inference capabilities through optimized algorithms and increased computational resources. It excels in mathematics, programming, and logic, with performance nearing top-tier models like o3 and Gemini 2.5 Pro. This bot does not accept attachments. \n\nTechnical Specifications\n\nFile Support: Attachments not supported\nContext window: 160k tokens", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754049641148 } }, { "id": "llama-3.3-70b-n", "name": "llama-3.3-70b-n", "description": "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks. \n\nTechnical Specifications\n\nFile Support: Attachments not supported\nContext window: 128k tokens", "owned_by": "Novita AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754050595700 } }, { "id": "llama-3.3-70b-cs", "name": "llama-3.3-70b-cs", "description": "World’s fastest inference for Llama 3.3 70B with Cerebras. The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.", "owned_by": "CerebrasAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1747179391092 } }, { "id": "llama-3.1-70b-t", "name": "llama-3.1-70b-t", "description": "Llama 3.1 70B Instruct from Meta. Supports 128k tokens of context.\n\nThe points price is subject to change.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1721748215163 } }, { "id": "llama-3.1-8b-cs", "name": "llama-3.1-8b-cs", "description": "World’s fastest inference for Llama 3.1 8B with Cerebras. This Llama 8B instruct-tuned version is fast and efficient. The Llama 3.1 8B is an instruction tuned text only model, optimized for multilingual dialogue use cases. It has demonstrated strong performance compared to leading closed-source models in human evaluations.", "owned_by": "CerebrasAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1747179273060 } }, { "id": "gpt-researcher", "name": "gpt-researcher", "description": "GPT Researcher is an agent that conducts deep research on any topic and generates a comprehensive report with citations. GPT Researcher is powered by Tavily's search engine.\n\nGPTR is based on the popular open source project: https://github.com/assafelovic/gpt-researcher -- by integrating Tavily search, it is optimized for curation and ranking of trusted research sources. Learn more at https://gptr.dev or https://tavily.com", "owned_by": "GPT Researcher", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1735901906014 } }, { "id": "web-search", "name": "web-search", "description": "Web-enabled assistant bot that searches the internet to inform its responses. Particularly good for queries regarding up-to-date information or specific facts. Powered by Gemini 2.0 Flash.", "owned_by": "Poe", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1694131444821 } }, { "id": "gpt-4o-search", "name": "gpt-4o-search", "description": "OpenAI's fine-tuned model for searching the web for real-time information. For less expensive messages, consider https://poe.com/GPT-4o-mini-Search. Uses medium search context size, currently in preview, supports 128k tokens of context. Does not support image search.", "owned_by": "OpenAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2.2, "currency": "USD" }, "output": { "per_million_tokens": 9, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741720622451 } }, { "id": "gpt-4o-mini-search", "name": "gpt-4o-mini-search", "description": "OpenAI's fine-tuned model for searching the web for real-time information. For higher-performance, consider https://poe.com/GPT-4o-Search. Uses medium search context size, currently in preview, supports 128k tokens of context. Does not support image search.", "owned_by": "OpenAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.14, "currency": "USD" }, "output": { "per_million_tokens": 0.54, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1741724009166 } }, { "id": "reka-research", "name": "reka-research", "description": "Reka Research is a state-of-the-art agentic AI that answers complex questions by browsing the web. It excels at synthesizing information from multiple sources, performing work that usually takes hours in minutes", "owned_by": "Reka AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750919363394 } }, { "id": "perplexity-sonar", "name": "perplexity-sonar", "description": "Sonar by Perplexity is a cutting-edge AI model that delivers real-time, web-connected search results with accurate citations. It's designed to provide up-to-date information and customizable search sources, making it a powerful tool for integrating AI search into various applications. Context Length: 127k", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 127000, "metadata": { "source": "api", "tags": [], "created": 1737790362317 } }, { "id": "linkup-deep-search", "name": "linkup-deep-search", "description": "Linkup Deep Search is an AI-powered search bot that continues to search iteratively if it hasn't found sufficient information on the first attempt. Results are slower compared to its Standard search counterpart, but often yield to more comprehensive results.\nLinkup's technology ranks #1 globally for factual accuracy, achieving state-of-the-art scores on OpenAI’s SimpleQA benchmark. Context Window: 100k\nAudio/video files are not supported at this time. \nParameter controls available: \n1. Domain control. To search only within specific domains use --include_domains, To exclude domains from the search result use --exclude_domains, To give higher priority on search use --prioritize_domains.\n2. Date Range: Use --from_date and to_date to select date range search. Use YYYY-MM-DD date format\n3. Content Option: Use --include_image true to include relevant images on search and --image_count (up to 45) to display specific number of images to display.\nLearn more: https://www.linkup.so/", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755390159000 } }, { "id": "linkup-standard", "name": "linkup-standard", "description": "Linkup Standard is an AI-powered search bot that provides detailed overviews and answers sourced from the web, helping you find high-quality information quickly and accurately. Results are faster compared to its Deep search counterpart. Context Window: 100k\nLinkup's technology ranks #1 globally for factual accuracy, achieving state-of-the-art scores on OpenAI’s SimpleQA benchmark. Audio/video files are not supported at this time.\nParameter controls available: \n1. Domain control. To search only within specific domains use --include_domains, To exclude domains from the search result use --exclude_domains, To give higher priority on search use --prioritize_domains.\n2. Date Range: Use --from_date and to_date to select date range search. Use YYYY-MM-DD date format\n3. Content Option: Use --include_image true to include relevant images on search and --image_count (up to 45) to display specific number of images to display.\nLearn more: https://www.linkup.so/", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755298530796 } }, { "id": "perplexity-sonar-pro", "name": "perplexity-sonar-pro", "description": "Sonar Pro by Perplexity is an advanced AI model that enhances real-time, web-connected search capabilities with double the citations and a larger context window. It's designed for complex queries, providing in-depth, nuanced answers and extended extensibility, making it ideal for enterprises and developers needing robust search solutions. Context Length: 200k (max output token limit of 8k)", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200000, "metadata": { "source": "api", "tags": [], "created": 1737790959209 } }, { "id": "perplexity-sonar-rsn-pro", "name": "perplexity-sonar-rsn-pro", "description": "This model operates on the open-sourced uncensored R1-1776 model from Perplexity with web search capabilities. The Perplexity Sonar Rsn Pro Reasoning Model takes AI-powered answers to the next level, offering unmatched quality and precision. Outperforming leading search engines and LLMs, This model has demonstrated superior performance in the SimpleQA benchmark, making it the gold standard for high-quality answer generation. Context Length: 128k (max output token limit of 8k)", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1739997380566 } }, { "id": "perplexity-sonar-rsn", "name": "perplexity-sonar-rsn", "description": "*Model will be depreciated on December 15, 2025. We recommend switching to Sonar-Rsn-Pro.\nThis model operates on the open-sourced uncensored R1-1776 model from Perplexity with web search capabilities. The Sonar Reasoning Model is a cutting-edge AI answer engine designed to deliver fast, accurate, and reliable responses. Context Length: 128k", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1739996703995 } }, { "id": "perplexity-deep-research", "name": "perplexity-deep-research", "description": "Perplexity Deep Research is a research-focused model designed for multi-step retrieval, synthesis, and reasoning across complex topics. It autonomously searches, reads, and evaluates sources, refining its approach as it gathers information. This enables comprehensive report generation across domains like finance, technology, health, and current events. Context Length: 128k", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1740542141787 } }, { "id": "flux-pro-1.1-ultra", "name": "flux-pro-1.1-ultra", "description": "State-of-the-art image generation with four times the resolution of standard FLUX-1.1-pro. Best-in-class prompt adherence and pixel-perfect image detail. Use \"--aspect\" to select an aspect ratio (e.g --aspect 1:1). Add \"--raw\" (no other arguments needed) for an overall less processed, everyday aesthetic. Valid aspect ratios are 21:9, 16:9, 4:3, 1:1, 3:4, 9:16, & 9:21. Send an image to have this model reimagine/regenerate it via FLUX Redux, and use \"--strength\" (e.g --strength 0.7) to control the impact of the text prompt (1 gives greater influence, 0 means very little).\"--raw true\" to enable raw photographic detail.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 250, "metadata": { "source": "api", "tags": [], "created": 1731696606126 } }, { "id": "mistral-small-3.1", "name": "mistral-small-3.1", "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments.", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1742338142315 } }, { "id": "claude-opus-3", "name": "claude-opus-3", "description": "Anthropic's Claude Opus 3 can handle complex analysis, longer tasks with multiple steps, and higher-order math and coding tasks. Supports 200k tokens of context (approximately 150k English words).", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 189096, "pricing": { "input": { "per_million_tokens": 13, "currency": "USD" }, "output": { "per_million_tokens": 64, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1709574492024 } }, { "id": "sonic-3.0", "name": "sonic-3.0", "description": "Generates audio based on your prompt using the latest Cartesia's Sonic 3.0 text-to-speech model in your voice of choice.\nSupports 10k characters.\n\nYou can select a voice and language in option menu in the input bar.\n\nThe following voices are supported covering 42 languages (English, Arabic, Bengali, Bulgarian, Chinese, Croatian, Czech, Danish, Dutch, Finnish, French, Georgian, German, Greek, Gujarati, Hebrew, Hindi, Hungarian, Indonesian, Italian, Japanese, Kannada, Korean, Malay, Malayalam, Marathi, Norwegian, Polish, Portuguese, Punjabi, Romanian, Russian, Slovak, Spanish, Swedish, Tagalog, Tamil, Telugu, Thai, Turkish, Ukrainian, Vietnamese):\n\n-- English --\nAriana\nKiefer\nTessa\nBrandon\nLinda - Conversational Guide\nRonald - Thinker\nBrooke - Big Sister\nKatie - Friendly Fixer\nJacqueline - Reassuring Agent\nCaroline - Southern Guide\n\n-- Arabic --\nAmira - Dreamy Whisperer\nOmar - High-Energy Presenter\n\n-- Bengali --\nPooja - Everyday Assistant\nRubel - City Guide\n\n-- Bulgarian --\nIvana - Instruction Provider\nGeorgi - Conversationalist\n\n-- Chinese --\nHua - Sunny Support\nYue - Gentle Woman\nTao - Lecturer\nLan - Instructor\n\n-- Croatian --\nPetra - Strict Lecturer\nIvan - Bar Companion\n\n-- Czech --\nJana - Crisp Conversationalist\nPetr - Pastor\n\n-- Danish --\nKatrine - Calm Caregiver\n\n-- Dutch --\nBram - Instructional\nDaan - Business Baritone\nSanne - Clear Companion\nLucas - Storyteller\n\n-- Finnish --\nHelmi - Warm Friend\nMikko - Narration Expert\n\n-- French --\nHelpful French Lady\nFrench Narrator Man\nCalm French Woman\nAntoine - Stern Man\n\n-- Georgian --\nLevan - Support Guide\nTamara - Support Specialist\n\n-- German --\nThomas - Anchor\nViktoria - Phone Conversationalist\nLukas - Professional\nLena - Muse\n\n-- Greek --\nDespina - Motherly Woman\nNikos - Radio Storyteller\n\n-- Gujarati --\nIsha - Learner\nAmit - Sports Student\n\n-- Hebrew --\nNoam - Broadcaster\n\n-- Hindi --\nArushi - Hinglish Speaker\nSunil - Official Announcer\nRiya - College Roommate\nAadhya - Soother\n\n-- Hungarian --\nGabor - Reassuring\nEszter - Customer Companion\n\n-- Indonesian --\nSiti - Ad Narrator\nAndi - Dynamic Presenter\n\n-- Italian --\nLiv - Casual Friend\nAlessandra - Melodic Guide\nFrancesca - Elegant Partner\nGiancarlo - Support Leader\n\n-- Japanese --\nYumiko - Friendly Agent\nEmi - Soft-Spoken Friend\nYuki - Calm Woman\nDaisuke - Businessman\n\n-- Kannada --\nPrakash - Instructor\nDivya - Joyful Narrator\n\n-- Korean --\nJihyun - Anchorwoman\nMimi - Show Stopper\nByungtae - Enforcer\nJiwoo - Service Specialist\n\n-- Malay --\nAisyah - Chat Partner\nFaiz - Family Guide\n\n-- Malayalam --\nLatha - Friendly Host\n\n-- Marathi --\nSuresh - Instruction\nAnika - Enthusiastic Seller\n\n-- Norwegian --\nLars - Casual Conversationalist\n\n-- Polish --\nTomek - Casual Companion\nWojciech - Documentarian\nPiotr - Corporate Lead\nKatarzyna - Melodic Storyteller\n\n-- Portuguese --\nLuana - Public Speaker\nFelipe - Casual Talker\nAna Paula - Marketer\nBeatriz - Support Guide\n\n-- Punjabi --\nGurpreet - Companion\nJaspreet - Commercial Woman\n\n-- Romanian --\nAndrada - Steady Speaker\nAndrei - Conversationalist Guy\n\n-- Russian --\nTatiana - Friendly Storyteller\nNatalya - Soothing Guide\nIrina - Poetic\nSergei - Expressive Narrator\n\n-- Slovak --\nKatarina - Friendly Sales\nPeter - Narrator Man\n\n-- Spanish --\nPedro - Formal Speaker\nDaniela - Relaxed Woman\nFran - Confident Young Professional\nIsabel - Teacher\n\n-- Swedish --\nFreja - Nordic Reader\nIngrid - Peaceful Guide\nAnders - Nordic Baritone\nCees - Nordic Narrator\n\n-- Tagalog --\nLuz - Casual Speaker\nAngelo - Calm Narrator\n\n-- Tamil --\nArun - Lively\nLakshmi - Everyday\n\n-- Telugu --\nSindhu - Conversational Partner\nVikram - Folk Narrator\n\n-- Thai --\nSomchai - Star\nSuda - Fortune Teller\n\n-- Turkish --\nEmre - Calming Speaker\nLeyla - Story Companion\nAzra - Service Specialist\nTaylan - Expressive\n\n-- Ukrainian --\nOleh - Professional Guy\n\n-- Vietnamese --\nMinh - Conversational Partner\nXia - Calm Companion", "owned_by": "Cartesia AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761174377362 } }, { "id": "hailuo-music-v1.5", "name": "hailuo-music-v1.5", "description": "Generate music from text prompts using the MiniMax model, which leverages advanced AI techniques to create high-quality, diverse musical compositions. Send the lyrics of the music over as your prompt. \nUse `--style` to set the style of the generated music - for example, rock and roll, hip-hop, etc. \nBoth prompt/lyrics and style must be sent over for best quality.\nThe prompt supports [intro][verse][chorus][bridge][outro] sections.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758018191524 } }, { "id": "elevenlabs-music", "name": "elevenlabs-music", "description": "The ElevenLabs music model is a generative AI system designed to compose original music from text prompts. It allows creators to specify genres, moods, instruments, and structure, producing royalty-free tracks tailored to their needs. The model emphasizes speed, creative flexibility, and high-quality audio output, making it suitable for use in videos, podcasts, games, and other multimedia projects. This bot can produce songs with suggested lyrics based on general descriptions, exact lyrics if specified as such, or instrumental ones, all via prompting.\nUse `--music_length_ms` to set the length of the song in milliseconds (10,000 to 300,000 ms).\nPrompt input cannot exceed 2,000 characters.", "owned_by": "ElevenLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 2000, "metadata": { "source": "api", "tags": [], "created": 1756499655464 } }, { "id": "whisper-v3-large-t", "name": "whisper-v3-large-t", "description": "Whisper v3 Large is a state-of-the-art automatic speech recognition and translation model developed by OpenAI, offering 10–20% lower error rates than its predecessor, Whisper large-v2. It supports transcription and translation across numerous languages, with improvements in handling diverse audio inputs, including noisy conditions and long-form audio files.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1756410173218 } }, { "id": "stable-audio-2.5", "name": "stable-audio-2.5", "description": "Stable Audio 2.5 generates high-quality audio up to 3 minutes long from text prompts, supporting text-to-audio, audio-to-audio transformations, and inpainting with customizable settings like duration, steps, CFG scale, and more. It is Ideal for music production, cinematic sound design, and remixing. \n\nNote: Audio-to-audio and inpaint modes require a prompt alongside an uploaded audio file for generation.\n\nParameter controls available:\n1. Basic\n - Default: text-to-audio (no `--mode` needed)\n - If transforming uploaded audio: `--mode audio-to-audio`\n - If replacing specific parts: `--mode audio-inpaint`\n - `--output_format wav` (for high quality, otherwise omit for mp3)\n2. Timing and Randomness \n - `--duration [1-190 seconds]` controls how long generated audio is\n - '--random_seed false --seed [0-4294967294]' disables random seed generation\n3. Advanced\n - `--cfg_scale [1-25]`: Higher = closer to prompt (recommended 7-15)\n - `--steps [4-8]`: Higher = better quality (recommended 6-8)\n4. Transformation control (only for audio-to-audio)\n - `--strength [0-1]`: How much to change/transform (0.3-0.7 typical)\n5. Inpainting control (only for audio-inpaint)\n - `--mask_start_time [seconds]` start time of the uploaded audio to modify\n - `--mask_end_time [seconds]` end time of the uploaded audio to modify", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1756869275249 } }, { "id": "stable-audio-2.0", "name": "stable-audio-2.0", "description": "Stable Audio 2.0 generates audio up to 3 minutes long from text prompts, supporting text-to-audio and audio-to-audio transformations with customizable settings like duration, steps, CFG scale, and more. It is ideal for creative professionals seeking detailed and extended outputs from simple prompts.\n\nNote: Audio-to-audio mode requires a prompt alongside an uploaded audio file for generation.\n\nParameter controls available:\n1. Basic\n - Default: text-to-audio (no `--mode` needed)\n - If transforming uploaded audio: `--mode audio-to-audio`\n - `--output_format wav` (for high quality, otherwise omit for mp3)\n2. Timing and Randomness \n - `--duration [1-190 seconds]` controls how long generated audio is\n - '--random_seed false --seed [0-4294967294]' disables random seed generation\n3. Advanced\n - `--cfg_scale [1-25]`: Higher = closer to prompt (recommended 7-15)\n - `--steps [30-100]`: Higher = better quality (recommended 50-80)\n4. Transformation control (only for audio-to-audio)\n - `--strength [0-1]`: How much to change/transform (0.3-0.7 typical)", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1756880177270 } }, { "id": "hailuo-speech-02", "name": "hailuo-speech-02", "description": "Generate speech from text prompts using the MiniMax Speech-02 model. Include `--hd` at the end of your prompt for higher quality output with a higher price. You may set language with `--language`, voice with`--voice`, pitch with `--pitch`, speed with `--speed`, and volume with `--volume`. Please check the UI for allowed values for each parameter.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749503032615 } }, { "id": "elevenlabs-v2.5-turbo", "name": "elevenlabs-v2.5-turbo", "description": "ElevenLabs' leading text-to-speech technology converts your text into natural-sounding speech, using the Turbo v2.5 model. Simply send a text prompt, and the bot will generate audio using your choice of available voices. If you link a URL or a PDF, it will do its best to read it aloud to you. The overall default voice is Jessica, an American-English female.\n\nAdd --voice \"Voice Name\" to the end of a message (e.g. \"Hello world --voice Eric\") to customize the voice used. Add --language and the two-letter, Language ISO-639-1 code to your message if you notice pronunciation errors; table of ISO-639-1 codes here: https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes (e.g. zh for Chinese, es for Spanish, hi for Hindi)\n\nThe following voices are supported and recommended for each language:\n\nEnglish -- Sarah, George, River, Matilda, Will, Jessica, Brian, Lily, Monika Sogam\nChinese -- James Gao, Martin Li, Will, River\nSpanish -- David Martin, Will, Efrayn, Alejandro, Sara Martin, Regina Martin\nHindi -- Ranga, Niraj, Liam, Raju, Leo, Manu, Vihana Huja, Kanika, River, Monika Sogam, Muskaan, Saanu, Riya, Devi\nArabic -- Bill, Mo Wiseman, Haytham, George, Mona, Sarah, Sana, Laura\nGerman -- Bill, Otto, Leon Stern, Mila, Emilia, Lea, Leonie\nIndonesian -- Jessica, Putra, Mahaputra\nPortuguese -- Will, Muhammad, Onildo, Lily, Jessica, Alice\nVietnamese -- Bill, Liam, Trung Caha, Van Phuc, Ca Dao, Trang, Jessica, Alice, Matilda\nFilipino -- Roger, Brian, Alice, Matilda\nFrench -- Roger, Louis, Emilie\nSwedish -- Will, Chris, Jessica, Charlotte\nTurkish -- Cavit Pancar, Sohbet Adami, Belma, Sultan, Mahidevran\nRomanian -- Eric, Bill, Brian, Charlotte, Lily\nItalian -- Carmelo, Luca, Alice, Lily\nPolish -- Robert, Rob, Eric, Pawel, Lily, Alice\nNorwegian -- Chris, Charlotte\nCzech -- Pawel\nFinnish -- Callum, River\nHungarian -- Brian, Sarah\nJapanese -- Alice\n\nPrompt input cannot exceed 40,000 characters.", "owned_by": "ElevenLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1730153913289 } }, { "id": "sonic-2.0", "name": "sonic-2.0", "description": "Generates audio based on your prompt using the latest Cartesia's Sonic 2.0 text-to-speech model in your voice of choice (see below)\n\nAdd --voice [Voice Name] to the end of a message to customize the voice used or to handle different language inputs (e.g. 你好 --voice Chinese Commercial Woman). All of Cartesia's voices are supported on Poe. \n\nThe following voices are supported covering 15 languages (English, French, German, Spanish, Portuguese, Chinese, Japanese, Hindi, Italian, Korean, Dutch, Polish, Russian, Swedish, Turkish):\n\n\nHere's the alphabetical list of all the top voice names:\n\n\"1920's Radioman\"\nAadhya\nAdele\nAlabama Man\nAlina\nAmerican Voiceover Man\nAnanya\nAnna\nAnnouncer Man\nApoorva\nASMR Lady\nAustralian Customer Support Man\nAustralian Man\nAustralian Narrator Lady\nAustralian Salesman\nAustralian Woman\nBarbershop Man\nBrenda\nBritish Customer Support Lady\nBritish Lady\nBritish Reading Lady\nBrooke\nCalifornia Girl\nCalm French Woman\nCalm Lady\nCamille\nCarson\nCasper\nCathy\nChongz\nClassy British Man\nCommercial Lady\nCommercial Man\nConfident British Man\nConnie\nCorinne\nCustomer Support Lady\nCustomer Support Man\nDallas\nDave\nDavid\nDevansh\nElena\nEllen\nEthan\nFemale Nurse\nFlorence\nFrancesca\nFrench Conversational Lady\nFrench Narrator Lady\nFrench Narrator Man\nFriendly Australian Man\nFriendly French Man\nFriendly Reading Man\nFriendly Sidekick\nGerman Conversational Woman\nGerman Conversation Man\nGerman Reporter Man\nGerman Woman\nGrace\nGriffin\nHappy Carson\nHelpful French Lady\nHelpful Woman\nHindi Calm Man\nHinglish Speaking Woman\nIndian Lady\nIndian Man\nIsabel\nIshan\nJacqueline\nJanvi\nJapanese Male Conversational\nJoan of Ark\nJohn\nJordan\nKatie\nKeith\nKenneth\nKentucky Man\nKorean Support Woman\nLaidback Woman\nLena\nLily Whisper\nLittle Gaming Girl\nLittle Narrator Girl\nLiv\nLukas\nLuke\nMadame Mischief\nMadison\nMaria\nMateo\nMexican Man\nMexican Woman\nMia\nMiddle Eastern Woman\nMidwestern Man\nMidwestern Woman\nMovieman\nNathan\nNewslady\nNewsman\nNew York Man\nNico\nNonfiction Man\nOlivia\nOrion\nPeninsular Spanish Narrator Lady\nPleasant Brazilian Lady\nPleasant Man\nPolite Man\nPrincess\nProfessional Woman\nRebecca\nReflective Woman\nRonald\nRussian Storyteller Man\nSalesman\nSamantha Angry\nSamantha Happy\nSarah\nSarah Curious\nSavannah\nSilas\nSophie\nSouthern Man\nSouthern Woman\nSpanish Narrator Woman\nSpanish Reporter Woman\nSpanish-speaking Reporter Man\nSportsman\nStacy\nStern French Man\nSteve\nStoryteller Lady\nSweet Lady\nTatiana\nTaylor\nTeacher Lady\nThe Merchant\nTutorial Man\nWise Guide Man\nWise Lady\nWise Man\nWizardman\nYogaman\nYoung Shy Japanese Woman\nZia", "owned_by": "Cartesia AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1731968187492 } }, { "id": "gemini-2.5-flash-tts", "name": "gemini-2.5-flash-tts", "description": "Gemini‑2.5‑Flash‑TTS is Google’s low‐latency text‑to‑speech model that converts text input into audio output, supporting both single‑ and multi‑speaker voices with controllable style, accent, and expressive tone — ideal for applications like podcasts, audiobooks, and conversational voice systems.\nThis bot does not accept attachments.\n\nParameter controls available:\n1. Voice & Style Configuration\n - Basic Settings\n - `--mode single` (default) for single speaker or `--mode multi` for conversation\n - `--language [code]` (e.g., en-US, fr-FR, ja-JP; default: en-US)\n - `--output_format [MP3|WAV|OGG]` (default: MP3)\n - Single speaker: `--voice [voice_name]` (default: Charon)\n - Multi-speaker: `--voice [voice_name]` (primary speaker, default: Charon), `--voice2 [voice_name]` (secondary speaker, default: Kore)\n - Multi-speaker: `--speaker1_name [name]` (default: Speaker1), `--speaker2_name [name]` (default: Speaker2)\n - Style Instructions\n - `--style_prompt [text]` for tone/emotion (e.g., \"Cheerful tone\", \"Slow British accent\")\n2. Limitations\n - Text and style prompt limited to 4000 bytes each\n - Multi-speaker requires `SpeakerName: text` format\n\nAvailable voices: Zephyr (Bright), Puck (Upbeat), Charon (Informative), Kore (Firm), Fenrir (Excitable), Leda (Youthful), Orus (Firm), Aoede (Breezy), Callirrhoe (Easy-going), Autonoe (Bright), Enceladus (Breathy), Iapetus (Clear), Umbriel (Easy-going), Algieba (Smooth), Despina (Smooth), Erinome (Clear), Algenib (Gravelly), Rasalgethi (Informative), Laomedeia (Upbeat), Achernar (Soft), Alnilam (Firm), Schedar (Even), Gacrux (Mature), Pulcherrima (Forward), Achird (Friendly), Zubenelgenubi (Casual), Vindemiatrix (Gentle), Sadachbia (Lively), Sadaltager (Knowledgeable), Sulafat (Warm)\n\nAvailable languages: English (US, en-US), Arabic (Egyptian, ar-EG), Bengali (Bangladesh, bn-BD), Dutch (Netherlands, nl-NL), French (France, fr-FR), German (Germany, de-DE), Hindi (India, hi-IN), Indonesian (Indonesia, id-ID), Italian (Italy, it-IT), Japanese (Japan, ja-JP), Korean (Korea, ko-KR), Marathi (India, mr-IN), Polish (Poland, pl-PL), Portuguese (Brazil, pt-BR), Romanian (Romania, ro-RO), Russian (Russia, ru-RU), Spanish (US, es-US), Tamil (India, ta-IN), Telugu (India, te-IN), Thai (Thailand, th-TH), Turkish (Turkey, tr-TR), Ukrainian (Ukraine, uk-UA), Vietnamese (Vietnam, vi-VN)", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758667568690 } }, { "id": "gemini-2.5-pro-tts", "name": "gemini-2.5-pro-tts", "description": "Gemini‑2.5‑Pro‑TTS is Google’s highest‑quality text‑to‑speech model preview, designed for complex workflows like podcasts, audiobooks, and customer support; it delivers expressive, accent‑ and style‑controllable single‑ or multi‑speaker speech, supporting over 23 languages, and built for state‑of‑the‑art output with the most powerful model architecture.\nThis bot does not accept attachments.\n\nParameter controls available:\n1. Voice & Style Configuration\n- Basic Settings\n- `--mode single` (default) for single speaker or `--mode multi` for conversation\n- `--language [code]` (e.g., en-US, fr-FR, ja-JP; default: en-US)\n- `--output_format [MP3|WAV|OGG]` (default: MP3)\n- Single speaker: `--voice [voice_name]` (default: Charon)\n- Multi-speaker: `--voice [voice_name]` (primary speaker, default: Charon), `--voice2 [voice_name]` (secondary speaker, default: Kore)\n- Multi-speaker: `--speaker1_name [name]` (default: Speaker1), `--speaker2_name [name]` (default: Speaker2)\n- Style Instructions\n- `--style_prompt [text]` for tone/emotion (e.g., \"Cheerful tone\", \"Slow British accent\")\n2. Limitations\n- Text and style prompt limited to 4000 bytes each\n- Multi-speaker requires `SpeakerName: text` format\n\nAvailable voices: Zephyr (Bright), Puck (Upbeat), Charon (Informative), Kore (Firm), Fenrir (Excitable), Leda (Youthful), Orus (Firm), Aoede (Breezy), Callirrhoe (Easy-going), Autonoe (Bright), Enceladus (Breathy), Iapetus (Clear), Umbriel (Easy-going), Algieba (Smooth), Despina (Smooth), Erinome (Clear), Algenib (Gravelly), Rasalgethi (Informative), Laomedeia (Upbeat), Achernar (Soft), Alnilam (Firm), Schedar (Even), Gacrux (Mature), Pulcherrima (Forward), Achird (Friendly), Zubenelgenubi (Casual), Vindemiatrix (Gentle), Sadachbia (Lively), Sadaltager (Knowledgeable), Sulafat (Warm)\n\nAvailable languages: English (US, en-US), Arabic (Egyptian, ar-EG), Bengali (Bangladesh, bn-BD), Dutch (Netherlands, nl-NL), French (France, fr-FR), German (Germany, de-DE), Hindi (India, hi-IN), Indonesian (Indonesia, id-ID), Italian (Italy, it-IT), Japanese (Japan, ja-JP), Korean (Korea, ko-KR), Marathi (India, mr-IN), Polish (Poland, pl-PL), Portuguese (Brazil, pt-BR), Romanian (Romania, ro-RO), Russian (Russia, ru-RU), Spanish (US, es-US), Tamil (India, ta-IN), Telugu (India, te-IN), Thai (Thailand, th-TH), Turkish (Turkey, tr-TR), Ukrainian (Ukraine, uk-UA), Vietnamese (Vietnam, vi-VN)", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758861500162 } }, { "id": "orpheus-tts", "name": "orpheus-tts", "description": "Orpheus TTS is a state-of-the-art, Llama-based Speech-LLM designed for high-quality, empathetic text-to-speech generation. Send a text prompt to voice it. Use --voice to choose from one of the available voices (`tara`, `leah`, `jess`, `leo`, `dan`,`mia`, `zac`, `zoe`). Officially supported sound effects are: , , , , , , , , and .", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1743698312235 } }, { "id": "deepgram-nova-3", "name": "deepgram-nova-3", "description": "Transcribe audio files using Speech-to-Text technology with the Deepgram Nova-3 model, featuring multi-language support and advanced customizable settings.\n\n[1] Basic Features: \nUse `--generate_pdf true` to generate a PDF file of the transcription, \nUse `--diarize true` to identify different speakers in the audio. This will automatically enable utterances.\nUse `--smart_format false` to disable automatic format text for improved readability including punctuation and paragraphs. This feature is enabled by default.\n\n[2] Advanced Features:\nUse `--dictation true` to convert spoken commands for punctuation into their respective marks (e.g., 'period' becomes '.'). This will automatically enable punctuation.\nUse `--measurements true` to format spoken measurement units into abbreviations\nUse `--profanity_filter true` to replace profanity with asterisks\nUse `--redact_pci true` to redact payment card information\nUse `--redact_pii true` to redact personally identifiable information\nUse `--utterances true` to segment speech into meaningful semantic units\nUse `--paragraphs false` to disable paragraphs feature. This feature split audio into paragraphs to improve transcript readability. This will automatically enable punctuation. This is enabled by default.\nUse `--punctuate false` to disable punctuate feature. This feature add punctuation and capitalization to your transcript. This is enabled by default.\nUse `--numerals false` to disable numerals feature. This feature convert numbers from written format to numerical format\n\n[3] Languages Supported:\nAuto-detect (Default)\nEnglish\nSpanish\nFrench\nGerman\nItalian\nPortuguese\nJapanese\nChinese\nHindi\nRussian\nDutch\n\n[4] Key Terms `--keyterm` to enter important terms to improve recognition accuracy, separated by commas. English only, Limited to 500 tokens total.", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753875390474 } }, { "id": "playai-tts", "name": "playai-tts", "description": "Generates audio based on your prompt using PlayHT's text-to-speech model, in the voice of your choice. Use --voice [voice_name] to pass in the voice of your choice, choosing one from below. Voice defaults to `Jennifer_(English_(US)/American)`. \n\nJennifer_(English_(US)/American)\nDexter_(English_(US)/American)\nAva_(English_(AU)/Australian)\nTilly_(English_(AU)/Australian)\nCharlotte_(Advertising)_(English_(CA)/Canadian)\nCharlotte_(Meditation)_(English_(CA)/Canadian)\nCecil_(English_(GB)/British)\nSterling_(English_(GB)/British)\nCillian_(English_(IE)/Irish)\nMadison_(English_(IE)/Irish)\nAda_(English_(ZA)/South_African)\nFurio_(English_(IT)/Italian)\nAlessandro_(English_(IT)/Italian)\nCarmen_(English_(MX)/Mexican)\nSumita_(English_(IN)/Indian)\nNavya_(English_(IN)/Indian)\nBaptiste_(English_(FR)/French)\nLumi_(English_(FI)/Finnish)\nRonel_Conversational_(Afrikaans/South_African)\nRonel_Narrative_(Afrikaans/South_African)\nAbdo_Conversational_(Arabic/Arabic)\nAbdo_Narrative_(Arabic/Arabic)\nMousmi_Conversational_(Bengali/Bengali)\nMousmi_Narrative_(Bengali/Bengali)\nCaroline_Conversational_(Portuguese_(BR)/Brazilian)\nCaroline_Narrative_(Portuguese_(BR)/Brazilian)\nAnge_Conversational_(French/French)\nAnge_Narrative_(French/French)\nAnke_Conversational_(German/German)\nAnke_Narrative_(German/German)\nBora_Conversational_(Greek/Greek)\nBora_Narrative_(Greek/Greek)\nAnuj_Conversational_(Hindi/Indian)\nAnuj_Narrative_(Hindi/Indian)\nAlessandro_Conversational_(Italian/Italian)\nAlessandro_Narrative_(Italian/Italian)\nKiriko_Conversational_(Japanese/Japanese)\nKiriko_Narrative_(Japanese/Japanese)\nDohee_Conversational_(Korean/Korean)\nDohee_Narrative_(Korean/Korean)\nIgnatius_Conversational_(Malay/Malay)\nIgnatius_Narrative_(Malay/Malay)\nAdam_Conversational_(Polish/Polish)\nAdam_Narrative_(Polish/Polish)\nAndrei_Conversational_(Russian/Russian)\nAndrei_Narrative_(Russian/Russian)\nAleksa_Conversational_(Serbian/Serbian)\nAleksa_Narrative_(Serbian/Serbian)\nCarmen_Conversational_(Spanish/Spanish)\nPatricia_Conversational_(Spanish/Spanish)\nAiken_Conversational_(Tagalog/Filipino)\nAiken_Narrative_(Tagalog/Filipino)\nKatbundit_Conversational_(Thai/Thai)\nKatbundit_Narrative_(Thai/Thai)\nAli_Conversational_(Turkish/Turkish)\nAli_Narrative_(Turkish/Turkish)\nSahil_Conversational_(Urdu/Pakistani)\nSahil_Narrative_(Urdu/Pakistani)\nMary_Conversational_(Hebrew/Israeli)\nMary_Narrative_(Hebrew/Israeli)", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1737458808496 } }, { "id": "unreal-speech-tts", "name": "unreal-speech-tts", "description": "Convert chats, URLs, and documents into natural speech. 8 Languages: English, Japanese, Chinese, Spanish, French, Hindi, Italian, Portuguese. Use `--voice `. Defaults to `--voice Sierra`. Full list below:\n\nAmerican English\n- Male: Noah, Jasper, Caleb, Ronan, Ethan, Daniel, Zane, Rowan\n- Female: Autumn, Melody, Hannah, Emily, Ivy, Kaitlyn, Luna, Willow, Lauren, Sierra\n\nBritish English\n- Male: Benjamin, Arthur, Edward, Oliver\n- Female: Eleanor, Chloe, Amelia, Charlotte\n\nJapanese\n- Male: Haruto\n- Female: Sakura, Hana, Yuki, Rina\n\nChinese\n- Male: Wei, Jian, Hao, Sheng\n- Female: Mei, Lian, Ting, Jing\n\nSpanish\n- Male: Mateo, Javier\n- Female: Lucía\n\nFrench\n- Female: Élodie\n\nHindi\n- Male: Arjun, Rohan\n- Female: Ananya, Priya\n\nItalian\n- Male: Luca\n- Female: Giulia\n\nPortuguese\n- Male: Thiago, Rafael\n- Female: Camila", "owned_by": "Unreal Speech", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1741061137514 } }, { "id": "imagen-4-ultra", "name": "imagen-4-ultra", "description": "DeepMind's May 2025 text-to-image model with exceptional prompt adherence, capable of generating images with great detail, rich lighting, and few distracting artifacts. To adjust the aspect ratio of your image add --aspect_ratio (1:1, 16:9, 9:16, 4:3, 3:4). Non-English input will be translated first. Serves the `imagen-4.0-ultra-generate-exp-05-20` model from Google Vertex, and has a maximum input of 480 tokens.", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 480, "metadata": { "source": "api", "tags": [], "created": 1748061401435 } }, { "id": "imagen-4-fast", "name": "imagen-4-fast", "description": "DeepMind's June 2025 text-to-image model with exceptional prompt adherence, capable of generating images with great detail, rich lighting, and few distracting artifacts. To adjust the aspect ratio of your image add --aspect_ratio (1:1, 16:9, 9:16, 4:3, 3:4). Non-English input will be translated first. Serves the `imagen-4.0-fast-generate-preview-06-06` model from Google Vertex, and has a maximum input of 480 tokens.", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 480, "metadata": { "source": "api", "tags": [], "created": 1750875079224 } }, { "id": "imagen-4", "name": "imagen-4", "description": "DeepMind's May 2025 text-to-image model with exceptional prompt adherence, capable of generating images with great detail, rich lighting, and few distracting artifacts. To adjust the aspect ratio of your image add --aspect_ratio (1:1, 16:9, 9:16, 4:3, 3:4). Non-English input will be translated first. Serves the `imagen-4.0-ultra-generate-05-20` model from Google Vertex, and has a maximum input of 480 tokens.", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 480, "metadata": { "source": "api", "tags": [], "created": 1747888192720 } }, { "id": "phoenix-1.0", "name": "phoenix-1.0", "description": "High-fidelity image generation with strong prompt adherence, especially for long and detailed instructions. Phoenix is capable of rendering coherent text in a wide variety of contexts. Prompt enhance is on to see the full power of a long, detailed prompt, but it can be turned off for full control. Uses the Phoenix 1.0 Fast model for performant, high-quality generations.\n\nParameters:\n- Aspect Ratio (1:1, 3:2, 2:3, 9:16, 16:9)\n- Prompt Enhance (Enable the prompt for better image generation)\n- Style (Please see parameter control to identify available styles)\n\nImage generation prompts can be a maximum of 1500 characters.", "owned_by": "Leonardo Ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1748565176146 } }, { "id": "dreamina-3.1", "name": "dreamina-3.1", "description": "ByteDance's Dreamina 3.1 Text-to-Image showcases superior picture effects, with significant improvements in picture aesthetics, precise and diverse styles, and rich details. This model excels with large prompts, please use large prompts in case you face Content Checker issues.\nThe model does not accept attachment. \nUse \"--aspect\" to select an aspect ratio (e.g --aspect 1:1). Valid aspect ratios are 16:9, 4:3, 1:1, 3:4, & 9:16.", "owned_by": "Bytedance", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754503266312 } }, { "id": "qwen-image-20b", "name": "qwen-image-20b", "description": "Qwen-Image (20B) is an image generation foundation model in the Qwen series that achieves significant advances in complex text rendering. Use `--aspect` to set the aspect ratio. Valid aspect ratios are 16:9, 4:3, 1:1, 3:4, 9:16. Use `--negative_prompt` to set the negative prompt.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754502513609 } }, { "id": "hunyuan-image-2.1", "name": "hunyuan-image-2.1", "description": "Hunyuan Image 2.1 is a high quality, highly efficient text-to-image model. Send a prompt to generate an image. \nUse `--aspect` (one of `16:9`, `4:3`, `1:1`, `3:4`, `9:16`) to set the aspect ratio of the generated image.\nUse `--negative_prompt` (examples: blur, low resolution, poor quality) to set negative prompt on the image generated.\nThis bot does not accept attachment.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757535106819 } }, { "id": "flux-kontext-pro", "name": "flux-kontext-pro", "description": "The FLUX.1 Kontext [pro] model delivers state-of-the-art image generation results with unprecedented prompt following, photorealistic rendering, flawless typography, and image editing capabilities. Send a prompt to generate an image, or send an image along with an instruction to edit the image. Use `--aspect` to set the aspect ratio for text-to-image-generation. Available aspect ratio (21:9, 16:9, 4:3, 1:1, 3:4, 9:16, & 9:21)", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1748527242279 } }, { "id": "flux-krea", "name": "flux-krea", "description": "FLUX-Krea is a version of FLUX Dev tuned for superior aesthetics. Use \"--aspect\" to select an aspect ratio (e.g --aspect 1:1). Valid aspect ratios are 16:9, 4:3, 1:1, 3:4, 9:16. Send an image to have this model reimagine/regenerate it via FLUX Krea Redux.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753991501514 } }, { "id": "imagen-3", "name": "imagen-3", "description": "Google DeepMind's highest quality text-to-image model, capable of generating images with great detail, rich lighting, and few distracting artifacts. To adjust the aspect ratio of your image add --aspect_ratio (1:1, 16:9, 9:16, 4:3, 3:4). For simpler prompts, faster results, & lower cost, use @Imagen3-Fast. Non english input will be translated first. Image prompt cannot exceed 480 tokens.", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 480, "metadata": { "source": "api", "tags": [], "created": 1729023417016 } }, { "id": "wan-animate", "name": "wan-animate", "description": "Wan Animate takes in an image and a video to generate another video where a character in the image replaces a character in the video(default), or the video character's motion is used to animate the character in the image. Pass --animate for the second functionality.\nThe bot supports only four file types: JPEG, PNG, WebP, and MP4", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758552514026 } }, { "id": "imagen-3-fast", "name": "imagen-3-fast", "description": "Google DeepMind's highest quality text-to-image model, capable of generating images with great detail, rich lighting, and few distracting artifacts — optimized for short, simple prompts. To adjust the aspect ratio of your image add --aspect_ratio (1:1, 16:9, 9:16, 4:3, 3:4). For more complex prompts, use @Imagen3. Non english input will be translated first. Image prompt cannot exceed 480 tokens.", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 480, "metadata": { "source": "api", "tags": [], "created": 1729127959259 } }, { "id": "seedream-3.0", "name": "seedream-3.0", "description": "Seedream 3.0 by ByteDance is a bilingual (Chinese and English) text-to-image model that excels at text-to-image generation.", "owned_by": "Bytedance", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750007407012 } }, { "id": "seedance-1.0-pro", "name": "seedance-1.0-pro", "description": "Seedance is a video generation model with text-to-video and image-to-video capabilities. It achieves breakthroughs in semantic understanding and prompt following. Use `--aspect` to set the aspect ratio (available values: `21:9`, `16:9`, `4:3`, `1:1`, `3:4`, `9:16`). Use `--resolution` (one of `480p`,`720p`,`1080p` to set the video resolution. `--duration` (3 to 12) sets the video duration.\nNumber of video tokens calculated for pricing is approximately: `height * width * fps * duration / 1024).", "owned_by": "Bytedance", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750447821693 } }, { "id": "seedance-1.0-lite", "name": "seedance-1.0-lite", "description": "Seedance is a video generation model with text-to-video and image-to-video capabilities. It achieves breakthroughs in semantic understanding and prompt following. \n\nOptional paremeters:\nUse `--aspect` to set the aspect ratio (available values:`21:9`, `16:9`, `4:3`, `1:1`, `3:4` and `9:16`). \nUse `--resolution` (one of `480p`, `720p` and `1080p` to set the video resolution. \nUse `--duration` (3 to 12) sets the video duration. Number of video tokens calculated for pricing is approximately: `height * width * fps * duration / 1024).", "owned_by": "Bytedance", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750007728801 } }, { "id": "ideogram-v3", "name": "ideogram-v3", "description": "Generate high-quality images, posters, and logos with Ideogram V3. Features exceptional typography handling and realistic outputs optimized for commercial and creative use. Use `--aspect` to set the aspect ratio (Valid aspect ratios are 5:4, 4:3, 4:5, 1:1, 1:2, 1:3, 3:4, 3:1, 3:2, 2:1, 2:3, 16:9, 16:10, 10:16, 9:16), and use `--style` to specify a style (one of `AUTO`, `GENERAL`, `REALISTIC`, and `DESIGN`, default: `AUTO`.). Send one image with a prompt for image remixing/restyling. Send two images (one an image and the other a black-and-white mask image denoting an area) for image editing.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1746189583927 } }, { "id": "ideogram-v2", "name": "ideogram-v2", "description": "Latest image model from Ideogram, with industry leading capabilities in generating realistic images, graphic design, typography, and more. Allows users to specify the aspect ratio of the image using the \"--aspect\" parameter at the end of the prompt (e.g. \"Tall trees, daylight --aspect 9:16\"). Valid aspect ratios are 10:16, 16:10, 9:16, 16:9, 3:2, 2:3, 4:3, 3:4, 1:1. \"--style\" parameter can be defined to specify the style of image generated(GENERAL, REALISTIC, DESIGN, RENDER_3D, ANIME). Powered by Ideogram.", "owned_by": "IdeogramAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 150, "metadata": { "source": "api", "tags": [], "created": 1724273571743 } }, { "id": "flux-dev-di", "name": "flux-dev-di", "description": "High quality image generator using FLUX dev model. Top of the line prompt following, visual quality and output diversity. This model is a text to image generation only and does not accept attachments. To further customize the prompt, you can follow the parameters available:\n\nTo set width, use \"--width\". Valid pixel options from 128 up to 1920. Default value: 1024\nTo set height, use \"--height\". Valid pixel options from 128, up to 1920. Default value: 1024\nTo set seed, use \"--seed\" for reproducible result. Options from 1 up to 2**32. Default value: random\nTo set inference, use \"--num_inference_steps\". Options from 1 up to 50. Default: 25", "owned_by": "DeepInfra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750507284607 } }, { "id": "flux-schnell-di", "name": "flux-schnell-di", "description": "This is the fastest version of FLUX, featuring highly optimized abstract models that excel at creative and unconventional renders. To further customize the prompt, you can follow the parameters available:\n\nTo set width, use \"--width\". Valid pixel options from 128 up to 1920. Default value: 1024\nTo set height, use \"--height\". Valid pixel options from 128, up to 1920. Default value: 1024\nTo set seed, use \"--seed\" for reproducible result. Options from 1 up to 2**32. Default value: random\nTo set inference, use \"--num_inference_steps\". Options from 1 up to 50. Default: 1", "owned_by": "DeepInfra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1750333477944 } }, { "id": "flux-pro-1.1", "name": "flux-pro-1.1", "description": "State-of-the-art image generation with top-of-the-line prompt following, visual quality, image detail and output diversity. This is the most powerful version of FLUX 1.1, use \"--aspect\" to select an aspect ratio (e.g --aspect 1:1). Valid aspect ratios are 16:9, 4:3, 1:1, 3:4, 9:16. Send an image to have this model reimagine/regenerate it via FLUX Redux.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1727968438767 } }, { "id": "luma-photon-flash", "name": "luma-photon-flash", "description": "Luma Photon delivers industry-specific visual excellence, crafting images that align perfectly with professional standards - not just generic AI art. From marketing to creative design, each generation is purposefully tailored to your industry's unique requirements. Add --aspect to the end of your prompts to change the aspect ratio of your generations (1:1, 16:9, 9:16, 4:3, 3:4, 21:9, 9:21 are supported). Prompt input cannot exceed 5,000 characters.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1733181412355 } }, { "id": "hidream-i1-full", "name": "hidream-i1-full", "description": "Hidream-I1 is a state-of-the-art text to image model by Hidream. Use `--aspect` to set the aspect ratio. Valid aspect ratios are 16:9, 4:3, 1:1, 3:4, 9:16. Use `--negative_prompt` to set the negative prompt. Hosted by fal.ai.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1747144375790 } }, { "id": "retro-diffusion-core", "name": "retro-diffusion-core", "description": "Generate true game ready pixel art in seconds at any resolution between 16x16 and 512x512 across the various styles. Create 48x48 walking animations of sprites using the \"animation_four_angle_walking\" style! First 50 basic image requests worth of points free! Check out more settings below 👇\n\n\nExample message: \"A cute corgi wearing sunglasses and a party hat --ar 128:128 --style rd_fast__portrait\"\n\nSettings:\n--ar : (Image size in pixels, larger images cost more. Or aspect ratio like 16:9)\n--style (The name of the style you want to use. Available styles: rd_fast__anime, rd_fast__retro, rd_fast__simple, rd_fast__detailed, rd_fast__game_asset, rd_fast__portrait, rd_fast__texture, rd_fast__ui, rd_fast__item_sheet, rd_fast__mc_texture, rd_fast__mc_item, rd_fast__character_turnaround, rd_fast__1_bit, animation__four_angle_walking, rd_plus__default, rd_plus__retro, rd_plus__watercolor, rd_plus__textured, rd_plus__cartoon, rd_plus__ui_element, rd_plus__item_sheet, rd_plus__character_turnaround, rd_plus__isometric, rd_plus__isometric_asset, rd_plus__topdown_map, rd_plus__top_down_asset)\n--seed (Random number, keep the same for consistent generations)\n--tile (Creates seamless edges on applicable images)\n--tilex (Seamless horizontally only)\n--tiley (Seamless vertically only)\n--native (Returns pixel art at native resolution, without upscaling)\n--removebg (Automatically remove the background)\n--iw (Controls how strong the image generation is. 0.0 for small changes, 1.0 for big changes)\n\nAdditional notes: All styles have a size range of 48x48 -> 512x512, except for the \"mc\" styles, which have a size range of 16x16 -> 128x128, and the \"animation_four_angle_walking\" style, which will only create 48x48 animations.", "owned_by": "Retro Diffusion", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1742484693553 } }, { "id": "stablediffusion3.5-l", "name": "stablediffusion3.5-l", "description": "Stability.ai's StableDiffusion3.5 Large, hosted by @fal, is the Stable Diffusion family's most powerful image generation model both in terms of image quality and prompt adherence. Use \"--aspect\" to select an aspect ratio (e.g --aspect 1:1). Valid aspect ratios are 16:9, 4:3, 1:1, 3:4, 9:16.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1729613306476 } }, { "id": "flux-schnell", "name": "flux-schnell", "description": "Turbo speed image generation with strengths in prompt following, visual quality, image detail and output diversity. This is the fastest version of FLUX.1. Use \"--aspect\" to select an aspect ratio (e.g --aspect 1:1). Valid aspect ratios are 16:9, 4:3, 1:1, 3:4, 9:16. Send an image to have this model reimagine/regenerate it via FLUX Redux.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1722523149211 } }, { "id": "veo-3.1", "name": "veo-3.1", "description": "Google’s Veo 3.1 is an updated version of the Veo family of models that features richer native audio, from natural conversations to synchronized sound effects, and offers greater narrative control with an improved understanding of cinematic styles. Enhanced image-to-video capabilities ensure better prompt adherence while delivering superior audio and visual quality and maintaining character consistency across multiple scenes.\n\nOptional parameters:\n`--aspect` to set the aspect ratio (either `16:9` or `9:16`), which defaults to `16:9`\nnegative prompt can be set by adding `--no` on elements to avoid e.g. `--no blurry`, `--no cloudy`\n`--duration` to set the duration (one of `4s`, `6s`, or `8s`), which defaults to `8s`\n`--seed` to set the seed (set number value)\n\nFor first & last frame video generation and references support, please use www.poe.com/Veo-v3.1", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 480, "metadata": { "source": "api", "tags": [], "created": 1760537766690 } }, { "id": "veo-3.1-fast", "name": "veo-3.1-fast", "description": "Google’s Veo 3.1 Fast is an updated version of the Veo family of models that's optimized for speed and cost, but still features richer native audio, from natural conversations to synchronized sound effects, and offer greater narrative control with an improved understanding of cinematic styles. Enhanced image-to-video capabilities ensure better prompt adherence while delivering superior audio and visual quality and maintaining character consistency across multiple scenes.\n\nOptional parameters:\n`--aspect` to set the aspect ratio (either `16:9` or `9:16`), which defaults to `16:9`\nnegative prompt can be set by adding `--no` on elements to avoid e.g. `--no blurry`, `--no cloudy`\n`--duration` to set the duration (one of `4s`, `6s`, or `8s`), which defaults to `8s`\n`--seed` to set the seed (set number value)\n\nFor first & last frame video generation support, please use www.poe.com/Veo-v3.1-Fast", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 480, "metadata": { "source": "api", "tags": [], "created": 1760540647924 } }, { "id": "kling-2.5-turbo-std", "name": "kling-2.5-turbo-std", "description": "Generate high-quality videos from images using Kling 2.5 Turbo Standard. \n\nOptional prompts:\nUse `--negative_prompt` to send a negative prompt, and `--cfg_scale` to send a classifier-free guidance scale between 0.0 and 1.0 (inclusive). \nUse `--duration` to set either 5 or 10 second video. Note - only Image to Video is supported, aspect ratio is inferred automatically from the image and cannot be set.\n\nSupported image file format: jpeg, png, webp", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761145308745 } }, { "id": "wan-2.5", "name": "wan-2.5", "description": "Wan-2.5 Video Generation bot. Has text-to-video and image-to-video capabilities. Optionally, send an audio file (mp3) to guide the video generation. \nOptional Parameters:\nControl the output's resolution with `--resolution` (480p, 720p or 1080p) defaults to 720. Pricing varies on the basis of resolution.\nAspect ratio with `--aspect` ( 16:9, 1:1, 9:16) defaults to 16:9.\nDuration with `--duration` ( 5s or 10s) defaults to 5s.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758919750787 } }, { "id": "seedream-4.0", "name": "seedream-4.0", "description": "Seedream 4.0 is ByteDance's latest and best text-to-image model, capable of impressive high fidelity image generation, with great text-rendering ability. Seedream 4.0 can also take in multiple images as references and combine them together or edit them to return an output. Pass `--aspect` to set the aspect ratio for the model (One of `16:9`, `4:3`, `1:1`, `3:4`, `9:16`).", "owned_by": "Bytedance", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757430793599 } }, { "id": "kling-2.5-turbo-pro", "name": "kling-2.5-turbo-pro", "description": "Generate high-quality videos from text and images using Kling 2.5 Turbo Pro. Use `--negative_prompt` to send a negative prompt, and `--cfg_scale` to send a classifier-free guidance scale between 0.0 and 1.0 (inclusive). Use `--aspect` to set the aspect ratio (One of `16:9`, `9:16` and `1:1`, only works for text-to-video). Use `--duration` to set either 5 or 10 second video.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758612711916 } }, { "id": "kling-2.1-master", "name": "kling-2.1-master", "description": "Kling 2.1 Master: The premium endpoint for Kling 2.1, designed for top-tier image-to-video generation with unparalleled motion fluidity, cinematic visuals, and exceptional prompt precision. Use `--negative_prompt` to send a negative prompt, and `--cfg_scale` to send a classifier-free guidance scale between 0.0 and 1.0 (inclusive). Use `--aspect` to set the aspect ratio (One of `16:9`, `9:16` and `1:1`). Use --duration to set either 5 second or 10 second video.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1748544153317 } }, { "id": "hailuo-02", "name": "hailuo-02", "description": "Hailuo-02, MiniMax's latest video generation model. Generates 6-second, 768p videos, just submit a text prompt or an image with a prompt describing the desired video behavior, and it will create it; typically takes ~5 minutes for generation time. Strong motion effects and ultra-clear quality.", "owned_by": "Minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750150747414 } }, { "id": "hailuo-02-standard", "name": "hailuo-02-standard", "description": "MiniMax Hailuo-02 Video Generation model: Advanced image-to-video generation model with 768p resolution. Send a prompt with an image for image-to-video, and just a prompt for text-to-video generation. Use `--duration` to set the video duration (6 or 10 seconds).", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750266147410 } }, { "id": "hailuo-02-pro", "name": "hailuo-02-pro", "description": "MiniMax Hailuo-02 Pro Video Generation model: Advanced image-to-video generation model with 1080p resolution. Send a prompt with an image for image-to-video, and just a prompt for text-to-video generation. Generates 5 second video.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753281868828 } }, { "id": "deepseek-r1-turbo-di", "name": "deepseek-r1-turbo-di", "description": "Top open-source reasoning LLM rivaling OpenAI's o1 model; delivers top-tier performance across math, code, and reasoning tasks at a fraction of the cost. Turbo model is quantized to achieve higher speeds. All data you provide this bot will not be used in training, and is sent only to DeepInfra, a US-based company.\n\nSupports 32k tokens of input context and 8k tokens of output context. Quantization: FP4 (turbo).", "owned_by": "DeepInfra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32000, "metadata": { "source": "api", "tags": [], "created": 1741250889407 } }, { "id": "hailuo-director-01", "name": "hailuo-director-01", "description": "Generate video clips more accurately with respect to natural language descriptions and using camera movement instructions for shot control. Both text-to-video and image-to-video are supported. \nCamera movement instructions can be added using square brackets (e.g. [Pan left] or [Zoom in]). \nYou can use up to 3 combined movements per prompt. Duration is fixed to 5 seconds. \nSupported movements: Truck left/right, Pan left/right, Push in/Pull out, Pedestal up/down, Tilt up/down, Zoom in/out, Shake, Tracking shot, Static shot. For example: [Truck left, Pan right, Zoom in]. \nFor a more detailed guide, refer https://sixth-switch-2ac.notion.site/T2V-01-Director-Model-Tutorial-with-camera-movement-1886c20a98eb80f395b8e05291ad8645", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749502785341 } }, { "id": "pixverse-v5", "name": "pixverse-v5", "description": "Pixverse v5 offers advanced creative tools with three main features: Text-to-Video, which transforms written prompts into cinematic, high-detail video clips with fluid motion and accurate visual interpretation; Image-to-Video, which animates static images into dynamic short videos with lifelike motion and smooth transitions; and Transition, which generates seamless morphs between frames or scenes to create unified, professional-quality visual flow.\n\nParameter Controls and Usage:\n1. Video Generation (Main Control Section)\n- `--resolution [360p|540p|720p|1080p]`\n - Description: Video resolution.\n - Default: 720p\n- `--duration [5|8]`\n - Description: Video length in seconds.\n - Default: 5\n- `--aspect_ratio [16:9|4:3|1:1|3:4|9:16]`\n - Description: Video aspect ratio.\n - Default: 16:9\n- `--style [none|anime|3d_animation|clay|comic|cyberpunk]`\n - Description: Video style (optional).\n - Default: none\n- `--negative_prompt \"[text]\"`\n - Description: Elements to avoid (optional).\n - Default: \"\" (empty)\n- `--seed [integer]`\n - Description: Optional seed for reproducibility (e.g., 12345).\n - Default: \"\" (empty/random)\n\n2. Generation Modes (Determined by attachments)\n- Text-to-Video: Provide a prompt with 0 image attachments.\n- Image-to-Video: Provide 1 image attachment.\n- Transition: Provide 2 image attachments (first is start frame, second is end frame).\n\n3. Limitations\n- The combination of `--resolution 1080p` and `--duration 8` is not supported.\n- Only 0, 1, or 2 image attachments are supported.\n- Attachments must be images (PNG/JPEG/WEBP/TIFF/BMP/HEIC/GIF).", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760645525570 } }, { "id": "pixverse-v4.5", "name": "pixverse-v4.5", "description": "Pixverse v4.5 is a video generation model capable of generating high quality videos in under a minute. \nUse `--negative_prompt` to set the negative prompt. \nUse `--duration` to set the video duration (5 or 8 seconds). \nSet the resolution (360p,540p,720p or 1080p) using `--resolution`. \nSend 1 image to perform an image-to-video task or a video effect generation task, and 2 images to perform a video transition task, using the first image as the first frame and the second image as the last frame. \nUse `--effect` to set the video generation effect, provided 1 image is given (Options: `Kiss_Me_AI`, `Kiss`, `Muscle_Surge`, `Warmth_of_Jesus`, `Anything,_Robot`, `The_Tiger_Touch`, `Hug`, `Holy_Wings`, `Hulk`, `Venom`, `Microwave`). Use `--style` to set the video generation style (for text-to-video,image-to-video, and transition only, options: `anime`, `3d_animation`, `clay`, `comic`, `cyberpunk`). \nUse `--seed` to set the seed and `--aspect` to set the aspect ratio.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1747737997951 } }, { "id": "flux-dev", "name": "flux-dev", "description": "High-performance image generation with top of the line prompt following, visual quality, image detail and output diversity. This is a more efficient version of FLUX-pro, balancing quality and speed. Use \"--aspect\" to select an aspect ratio (e.g --aspect 1:1). Valid aspect ratios are 16:9, 4:3, 1:1, 3:4, 9:16. Send an image to have this model reimagine/regenerate it via FLUX Redux.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1722521612508 } }, { "id": "lyria", "name": "lyria", "description": "Google DeepMind's Lyria 2 delivers high-quality audio generation, capable of creating diverse soundscapes and musical pieces from text prompts.\n\nAllows users to specify elements to exclude in the audio using the \"--no\" parameter at the end of the prompt. Also supports \"--seed\" for deterministic generation. e.g. \"An energetic electronic dance track --no vocals, slow tempo --seed 123\". Lyria blocks prompts that name specific artists or songs (artist-intent and recitation checks). This bot does not support attachments. This bot accepts input prompts of up to 480 tokens.", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749063911995 } }, { "id": "kling-1.6-pro", "name": "kling-1.6-pro", "description": "Kling v1.6 video generation bot, hosted by fal.ai. For best results, upload an image attachment.\nUse `--aspect` to set the aspect ratio. Allowed values are `16:9`, `9:16` and `1:1`. Use `--duration` to set the duration of the generated video (5 or 10 seconds).", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1737537681579 } }, { "id": "clarity-upscaler", "name": "clarity-upscaler", "description": "Upscales images with high fidelity to the original image. Use \"--upscale_factor\" (value is a number between 1 and 4) to set the upscaled images' size (2 means the output image is 2x in size, etc.). \"--creativity\" and \"--clarity\" can be set between 0 and 1 to alter the faithfulness to the original image and the sharpness, respectively.\nThis bot supports .jpg and .png images.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1736160594594 } }, { "id": "topazlabs", "name": "topazlabs", "description": "Topaz Labs’ image upscaler is a best-in-class generative AI model to increase overall clarity and the pixel amount of inputted photos — whether they be ones generated by AI image models and from the real world — while preserving the original photo’s contents. It can produce images of as small as ~10MB and as large as 512MB, depending on the size of the input photo. Specify --upscale and a number up to 16 to control the upscaling factor, output_height and/or output_width to specify the number of pixels for each dimension, and add --generated if the input photo is AI-generated. With no parameters specified, it will increase both input photo’s height and width by 2; especially effective on images of human faces.", "owned_by": "TopazLabs-Co", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 204, "metadata": { "source": "api", "tags": [], "created": 1733266151324 } }, { "id": "veo-v3.1", "name": "veo-v3.1", "description": "Google's Veo-3.1 is an improved version of Veo 3.\nUse `--aspect` to set the aspect ratio of the generated image (one of `16:9`, `9:16`).\nUse `--silent` to generate a silent video at a lower cost.\nUse --negative_prompt to set negative prompt option `blur`, `low resolution`, `poor quality`. (only for T2V).\nUse --duration to set the duration `4s`, `6s`, `8s`, default `8s`. `4s` and `6s` are only supported for text-to-video generation.\nPass a single image for image to video tasks. Pass two images for a first-frame-to-last-frame video generation task. Pass up to 3 images with `--reference` for a reference-to-video task. Reference images will be directly used in the video generation.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760568057558 } }, { "id": "veo-v3.1-fast", "name": "veo-v3.1-fast", "description": "Google's Veo 3.1 Fast is a fast version of Veo 3.1.\nUse `--aspect` to set the aspect ratio of the generated image (one of `16:9`, `9:16`).\nUse `--silent` to generate a silent video at a lower cost.\nUse --negative_prompt to set negative prompt option `blur`, `low resolution`, `poor quality`. (only for T2V).\nUse --duration to set the duration `4s`, `6s`, `8s`, default `8s`. `4s` and `6s` are only supported for text-to-video generation\nPass a single image for image to video tasks. Pass two images for a first-frame-to-last-frame video generation task.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760650990090 } }, { "id": "wan-2.2", "name": "wan-2.2", "description": "Wan-2.2 is a video model that generates high-quality videos with high visual quality and motion diversity from text prompts. Send one image for image to video tasks, and send two images for first-frame - last-frame generation. Use `--aspect` to set the aspect ratio (One of `16:9`, `1:1`, `9:16`) for text-to-video requests. Duration is limited to 5 seconds only with up to 720p resolution.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753731782474 } }, { "id": "ltx-2-fast", "name": "ltx-2-fast", "description": "LTX-2 Fast is a video model by Lightricks that delivers exceptional quality and speed. It can generate videos at up to 50 FPS in high resolutions and supports both text-to-video and image-to-video generation.\n\nOptional Prompts:\nUse `--generate-audio` to generate an audio with the video. This is disabled by default. \nPass resolution as `--resolution` with one of `1080p`, `1440p`, `2160p`. This is set to 1080p by default.\nSet the duration of the generated video with `--duration` (one of `6s, 8s, 10s`). This is set to 6s by default. Duration and resolution values will change the price. \nSet the fps of the generated video with `--fps`, to one of 25 or 50. This is set to 25 by default\n\nFile attachment accepted: jpeg, png, webp", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761343254536 } }, { "id": "ltx-2-pro", "name": "ltx-2-pro", "description": "LTX-2 Pro is an advanced video generation model by Lightricks designed for professional‑grade results. It offers high‑quality, realistic video generation at exceptional speed and supports outputs up to 2K resolution. Perfect for both text‑to‑video and image‑to‑video creation, it delivers cinematic detail and smooth performance.\n\nOptional Prompts:\nUse `--generate-audio` to generate an audio with the video. This is disabled by default. \nPass resolution as `--resolution` with one of `1080p`, `1440p`, `2160p`. This is set to 1080p by default.\nSet the duration of the generated video with `--duration` (one of `6s, 8s, 10s`). This is set to 6s by default. Duration and resolution values will change the price. \nSet the fps of the generated video with `--fps`, to one of 25 or 50. This is set to 25 by default.\n\nFile attachment accepted: jpeg, png, webp", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761343366727 } }, { "id": "veo-3-vfast", "name": "veo-3-vfast", "description": "Veo-3 Fast is a faster and more cost effective version of Google's Veo 3. \nUse `--aspect` to set the aspect ratio of the generated image (one of `16:9`, `1:1`, `9:16`). \nUse `--generate_audio` to generate audio with your video at a higher cost. \nUse --negative_prompt to set negative prompt option `blur`, `low resolution`, `poor quality`. \nDuration is limited to 7 seconds. This is a text to video generation model only.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1752140109634 } }, { "id": "vidu", "name": "vidu", "description": "The Vidu Video Generation Bot creates videos using images and text prompts. You can generate videos in four modes: \n(1) Image-to-Video: send 1 image with a prompt, \n(2) Start-to-End Frame: send 2 images with a prompt for transition videos, \n(3) Reference-to-Video: send up to 3 images with the `--reference` flag for guidance, and \n(4) Template-to-Video: use `--template` to apply pre-designed templates (1-3 images required, pricing varies by template). \n\nNumber of images required varies by template: `dynasty_dress` and `shop_frame` accept 1-2 images, `wish_sender` requires exactly 3 images, all other templates accept only 1 image.\n\nThe bot supports aspect ratios `--aspect` (16:9, 1:1, 9:16), set movement amplitude `--movement-amplitude`, and accepts PNG, JPEG, and WEBP formats. \nTasks are mutually exclusive (e.g., you cannot combine start-to-end frame and reference-to-video).\nDuration is limited to 5 seconds.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1756292711841 } }, { "id": "vidu-q1", "name": "vidu-q1", "description": "The Vidu Q1 Video Generation Bot creates videos using text prompts and images. You can generate videos in three modes: \n(1) Text-to-Video: send a text prompt, \n(2) Image-to-Video: send 1 image with a prompt, and \n(3) Reference-to-Video: send up to 7 images with the `--reference flag`. \n\nNumber of images required varies by template: `dynasty_dress` and `shop_frame` accept 1-2 images, `wish_sender` requires exactly 3 images, all other templates accept only 1 image.\n\nThe bot support aspect ratios `--aspect` (16:9, 1:1, 9:16) and set movement amplitude `--movement-amplitude` that can be customized for text-to-video and reference-to-video tasks. \nTasks are mutually exclusive (e.g., you cannot combine start-to-end frame and reference-to-video generation).\nThe bot accepts PNG, JPEG, and WEBP formats. Duration is limited to 5 seconds.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755797522439 } }, { "id": "veo-3-fast", "name": "veo-3-fast", "description": "Veo 3 Fast is a speed-optimized variant of Google’s Veo 3 AI video generation engine. It’s designed for rapid, cost-efficient production of short clips with synchronized audio (dialogue, ambient sound, effects). Prioritizes faster generation times while still delivering solid visual and audio quality, supports text-to-video and image-to-video workflows, allowing creators to animate still images into motion sequences, operates under defined constraints (e.g. video lengths of 4, 6, or 8 seconds, specified via the --duration parameter, e.g. \"A cat dances --duration 6\" will produce a 6-second video). Please only upload photos that you own or have the right to use, otherwise the bot will throw an error.", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 480, "metadata": { "source": "api", "tags": [], "created": 1760335513509 } }, { "id": "seedance-1.0-pro-fast", "name": "seedance-1.0-pro-fast", "description": "Seedance Pro Fast is a faster version of Seedance 1.0 Pro that balances speed, quality and cost. Seedance is a video generation model with text-to-video and image-to-video capabilities. It achieves breakthroughs in semantic understanding and prompt following.\n\nOptional prompts:\nUse `--aspect` to set the aspect ratio (available values: `21:9`, `16:9`, `4:3`, `1:1`, `3:4`, `9:16`). Set to `16:9` as default.\nUse `--resolution` (one of `480p`,`720p`,`1080p` to set the video resolution. Set to `1080p` as default.\n`--duration` (3 to 12) sets the video duration. Set to `5s` as default.\nNumber of video tokens calculated for pricing is approximately: `height * width * fps * duration / 1024).\n\nFile attachment accepted: jpeg, png, webp", "owned_by": "Bytedance", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761334162620 } }, { "id": "sora", "name": "sora", "description": "Sora is OpenAI's video generation model. Use `--duration` to set the duration of the generated video, and `--resolution` to set the video's resolution (480p, 720p, or 1080p). Set the aspect ratio of the generated video with `--aspect` (Valid aspect ratios are 16:9, 1:1, 9:16). This is a text-to-video model only.\n\nSwitch to the newest models for improved video and audio creation: https://poe.com/Sora-2-Pro for cinematic excellence or https://poe.com/Sora-2 for unmatched realism and precision.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749552672238 } }, { "id": "omnihuman", "name": "omnihuman", "description": "OmniHuman, by Bytedance, generates video using an image of a human figure paired with an audio file. It produces vivid, high-quality videos where the character’s emotions and movements maintain a strong correlation with the audio. Send an image including a human figure with a visible face, and an audio, and the bot will return a video. The maximum audio length accepted is 30 seconds.", "owned_by": "Bytedance", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753875678785 } }, { "id": "bagoodex-web-search", "name": "bagoodex-web-search", "description": "Bagoodex delivers real-time AI-powered web search offering instant access to videos, images, weather, and more. Audio and video uploads are not supported at this time.", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753947757043 } }, { "id": "deep-ai-search", "name": "deep-ai-search", "description": "Deep search engine integrating Brave AI with real-time web search. This chatbot executes commands and scrapes websites at scale while preserving its hallmark speed advantage. The bot doesn't accept file attachments.", "owned_by": "Open-Hat-Lab", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753262625533 } }, { "id": "kling-avatar-pro", "name": "kling-avatar-pro", "description": "Create lifelike avatar videos featuring realistic humans, animals, cartoons, or stylized characters. Simply upload an image and an audio file to generate a video of your character speaking.\n\nSupported file formats:\nImages: JPEG, PNG, WEBP\nAudio: MP3, WAV", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758919888726 } }, { "id": "playai-dialog", "name": "playai-dialog", "description": "Generates dialogues based on your script using PlayHT's text-to-speech model, in the voices of your choice. Use --speaker_1 [voice_name] and --speaker_2 [voice_name] to pass in the voices of your choice, choosing from below. Voice defaults to `Jennifer_(English_(US)/American)`. Follow the below format while prompting (case sensitive):\nFORMAT:\n```\nSpeaker 1: ......\nSpeaker 2: ......\nSpeaker 1: ......\nSpeaker 2: ......\n--speaker_1 [voice_1] --speaker_2 [voice_2]\n```\nVOICES AVAILABLE:\nJennifer_(English_(US)/American)\nDexter_(English_(US)/American)\nAva_(English_(AU)/Australian)\nTilly_(English_(AU)/Australian)\nCharlotte_(Advertising)_(English_(CA)/Canadian)\nCharlotte_(Meditation)_(English_(CA)/Canadian)\nCecil_(English_(GB)/British)\nSterling_(English_(GB)/British)\nCillian_(English_(IE)/Irish)\nMadison_(English_(IE)/Irish)\nAda_(English_(ZA)/South_African)\nFurio_(English_(IT)/Italian)\nAlessandro_(English_(IT)/Italian)\nCarmen_(English_(MX)/Mexican)\nSumita_(English_(IN)/Indian)\nNavya_(English_(IN)/Indian)\nBaptiste_(English_(FR)/French)\nLumi_(English_(FI)/Finnish)\nRonel_Conversational_(Afrikaans/South_African)\nRonel_Narrative_(Afrikaans/South_African)\nAbdo_Conversational_(Arabic/Arabic)\nAbdo_Narrative_(Arabic/Arabic)\nMousmi_Conversational_(Bengali/Bengali)\nMousmi_Narrative_(Bengali/Bengali)\nCaroline_Conversational_(Portuguese_(BR)/Brazilian)\nCaroline_Narrative_(Portuguese_(BR)/Brazilian)\nAnge_Conversational_(French/French)\nAnge_Narrative_(French/French)\nAnke_Conversational_(German/German)\nAnke_Narrative_(German/German)\nBora_Conversational_(Greek/Greek)\nBora_Narrative_(Greek/Greek)\nAnuj_Conversational_(Hindi/Indian)\nAnuj_Narrative_(Hindi/Indian)\nAlessandro_Conversational_(Italian/Italian)\nAlessandro_Narrative_(Italian/Italian)\nKiriko_Conversational_(Japanese/Japanese)\nKiriko_Narrative_(Japanese/Japanese)\nDohee_Conversational_(Korean/Korean)\nDohee_Narrative_(Korean/Korean)\nIgnatius_Conversational_(Malay/Malay)\nIgnatius_Narrative_(Malay/Malay)\nAdam_Conversational_(Polish/Polish)\nAdam_Narrative_(Polish/Polish)\nAndrei_Conversational_(Russian/Russian)\nAndrei_Narrative_(Russian/Russian)\nAleksa_Conversational_(Serbian/Serbian)\nAleksa_Narrative_(Serbian/Serbian)\nCarmen_Conversational_(Spanish/Spanish)\nPatricia_Conversational_(Spanish/Spanish)\nAiken_Conversational_(Tagalog/Filipino)\nAiken_Narrative_(Tagalog/Filipino)\nKatbundit_Conversational_(Thai/Thai)\nKatbundit_Narrative_(Thai/Thai)\nAli_Conversational_(Turkish/Turkish)\nAli_Narrative_(Turkish/Turkish)\nSahil_Conversational_(Urdu/Pakistani)\nSahil_Narrative_(Urdu/Pakistani)\nMary_Conversational_(Hebrew/Israeli)\nMary_Narrative_(Hebrew/Israeli)\n\nPrompt input cannot exceed 10,000 characters.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1737460623400 } }, { "id": "luma-photon", "name": "luma-photon", "description": "Luma Photon delivers industry-specific visual excellence, crafting images that align perfectly with professional standards - not just generic AI art. From marketing to creative design, each generation is purposefully tailored to your industry's unique requirements. Add --aspect to the end of your prompts to change the aspect ratio of your generations (1:1, 16:9, 9:16, 4:3, 3:4, 21:9, 9:21 are supported). Prompt input cannot exceed 5,000 characters.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1733181326256 } }, { "id": "ideogram", "name": "ideogram", "description": "Excels at creating high-quality images from text prompts. For most prompts, https://poe.com/Ideogram-v2 will produce better results. Allows users to specify the aspect ratio of the image using the \"--aspect\" parameter at the end of the prompt (e.g. \"Tall trees, daylight --aspect 9:16\"). Valid aspect ratios are 10:16, 16:10, 9:16, 16:9, 3:2, 2:3, 4:3, 3:4, & 1:1.", "owned_by": "IdeogramAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 150, "metadata": { "source": "api", "tags": [], "created": 1712178346331 } }, { "id": "seededit-3.0", "name": "seededit-3.0", "description": "SeedEdit 3.0 is an image editing model independently developed by ByteDance. It excels in accurately following editing instructions and effectively preserving image content, especially excelling in handling real images. Please send an image with a prompt to edit the image.", "owned_by": "Bytedance", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754502655602 } }, { "id": "kling-2.1-pro", "name": "kling-2.1-pro", "description": "Kling 2.1 Pro is an advanced endpoint for the Kling 2.1 model, offering professional-grade videos with enhanced visual fidelity, precise camera movements, and dynamic motion control, perfect for cinematic storytelling. Use `--negative_prompt` to send a negative prompt, and `--cfg_scale` to send a classifier-free guidance scale between 0.0 and 1.0 (inclusive). Set video duration to one of `5` or `10` seconds with `--duration`. Requires an image attachment.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1748544740987 } }, { "id": "kling-2.1-std", "name": "kling-2.1-std", "description": "Kling 2.1 Standard is a cost-efficient endpoint for the Kling 2.1 model, delivering high-quality image-to-video generation. Use `--negative_prompt` to send a negative prompt, and `--cfg_scale` to send a classifier-free guidance scale between 0.0 and 1.0 (inclusive). Set video duration to one of `5` or `10` seconds with `--duration`.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1748545509401 } }, { "id": "runway-gen-4-turbo", "name": "runway-gen-4-turbo", "description": "Runway's Gen-4 Turbo model creates best-in-class, controllable, and high-fidelity video generations based on your prompts. Both text inputs (max 1000 characters) and image inputs are supported, but we recommend using image inputs for best results. Use --aspect_ratio (16:9, 1:1, 9:16, landscape, portrait) for landscape/portrait videos. Use --duration (5, 10) to specify video length in seconds. Full prompting guide here: https://help.runwayml.com/hc/en-us/articles/39789879462419-Gen-4-Video-Prompting-Guide", "owned_by": "RunwayML", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1746825004531 } }, { "id": "runway", "name": "runway", "description": "Runway's Gen-3 Alpha Turbo model creates best-in-class, controllable, and high-fidelity video generations based on your prompts. Both text inputs (max 1000 characters) and image inputs are supported, but we recommend using image inputs for best results. Use --aspect_ratio (16:9, 9:16, landscape, portrait) for landscape/portrait videos. Use --duration (5, 10) to specify video length in seconds. ", "owned_by": "RunwayML", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1728610474100 } }, { "id": "veo-2", "name": "veo-2", "description": "Veo 2 creates incredibly high-quality videos in a wide range of subjects and styles. It brings an improved understanding of real-world physics and the nuances of human movement and expression, which helps improve its detail and realism overall. Veo 2 understands the unique language of cinematography: ask it for a genre, specify a lens, suggest cinematic effects and Veo 2 will deliver in 8-second clips. Use --aspect-ratio (16:9 or 9:16) to customize video aspect ratio. Supports text-to-video as well as image-to-video. Non english input will be translated first. Note: currently has low rate limit so you may need to retry your request at times of peak usage.", "owned_by": "Google", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 480, "metadata": { "source": "api", "tags": [], "created": 1733117805122 } }, { "id": "dream-machine", "name": "dream-machine", "description": "Luma AI's Dream Machine is an AI model that makes high-quality, realistic videos fast from text and images. Iterate at the speed of thought, create action-packed shots, and dream worlds with consistent characters on Poe today!\n\nTo specify the aspect ratio of your video add --aspect_ratio (1:1, 16:9, 9:16, 4:3, 3:4, 21:9, 9:21). To loop your video add --loop True.", "owned_by": "LumaLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 5000, "metadata": { "source": "api", "tags": [], "created": 1726690715197 } }, { "id": "kling-2.0-master", "name": "kling-2.0-master", "description": "Generate high-quality videos from text or images using Kling 2.0 Master. Use `--negative_prompt` to send a negative prompt, and `--cfg_scale` to send a classifier-free guidance scale between 0.0 and 1.0 (inclusive). Use `--aspect` to set the aspect ratio (One of `16:9`, `9:16` and `1:1`). Use `--duration` to set either 5 or 10 second video.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1744698597290 } }, { "id": "qwen-edit", "name": "qwen-edit", "description": "Image editing model based on Qwen-Image, with superior text editing capabilities.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755628345426 } }, { "id": "gptzero", "name": "gptzero", "description": "GPTZero is a deep-learning-driven platform designed to analyze and flag portions of text that are likely generated by AI vs. human authors. It distinguishes between “entirely human,” “entirely AI,” or “mixed” content and highlights the specific sentences involved.\n\n*Max number of files that can submitted simultaneously is 50, and the max file size for all files combined is 15 MB. Each file's document will be truncated to 50,000 characters.\n\nSupported file types: PDF, DOC/DOCX, TXT, ODT\n\nParameter controls available:\n1. Detection Options\n - Multilingual (FR/ES):\n - `--multilingual true` (Enables the GPTZero multilingual model)\n - `--multilingual false` (Default/Disabled)\n - Model Version:\n - `--modelVersion [version_string]` (Selects a specific GPTZero model version, e.g., '2025-10-30-base')\n - `--modelVersion __latest__` (Default: Automatically uses the latest model version)", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761948657951 } }, { "id": "kling-pro-effects", "name": "kling-pro-effects", "description": "Generate videos with effects like squishing an object, two people hugging, making heart gestures, etc. using Kling-Pro-Effects. Requires an image input. Send a single image for `squish` and `expansion` effects and two images (of people) for `hug`, `kiss`, and `heart_gesture` effects. Set effect with --effect. Default effect: `squish`.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1743698583798 } }, { "id": "hailuo-live", "name": "hailuo-live", "description": "Hailuo Live, the latest model from Minimax, sets a new standard for bringing still images to life. From breathtakingly vivid motion to finely tuned expressions, this state-of-the-art model enables your characters to captivate, move, and shine like never before. It excels in bring art and drawings to life, exceptional realism without morphing, emotional range, and unparalleled character consistency. Generates 5 second video.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1734370063740 } }, { "id": "hailuo-ai", "name": "hailuo-ai", "description": "Best-in-class text and image to video model by MiniMax.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1729194728486 } }, { "id": "ray2", "name": "ray2", "description": "Ray2 is a large–scale video generative model capable of creating realistic visuals with natural, coherent motion. It has strong understanding of text instructions and can also take image input. Can produce videos from 540p to 4k resolution and with either 5/9s durations.", "owned_by": "LumaLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 5000, "metadata": { "source": "api", "tags": [], "created": 1740094898040 } }, { "id": "veo-2-video", "name": "veo-2-video", "description": "Veo2 is Google's cutting-edge video generation model. Veo creates videos with realistic motion and high quality output.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1740172728462 } }, { "id": "wan-2.1", "name": "wan-2.1", "description": "Wan-2.1 is a text-to-video and image-to-video model that generates high-quality videos with high visual quality and motion diversity from text prompts. Generates 5 second video.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1741001573656 } }, { "id": "ideogram-v2a-turbo", "name": "ideogram-v2a-turbo", "description": "Fast, affordable text-to-image model, optimized for graphic design and photography. For higher quality, use https://poe.com/Ideogram-v2A\nUse `--aspect` to set the aspect ratio, and use `--style` to specify a style (one of `GENERAL`, `REALISTIC`, `DESIGN`, `3D RENDER` and `ANIME` default: `GENERAL`.)", "owned_by": "IdeogramAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 150, "metadata": { "source": "api", "tags": [], "created": 1740678577836 } }, { "id": "ideogram-v2a", "name": "ideogram-v2a", "description": "Fast, affordable text-to-image model, optimized for graphic design and photography. For faster and more cost-effective generations, use https://poe.com/Ideogram-v2A-Turbo\nUse `--aspect` to set the aspect ratio, and use `--style` to specify a style (one of `GENERAL`, `REALISTIC`, `DESIGN`, `3D RENDER` and `ANIME` default: `GENERAL`.)", "owned_by": "IdeogramAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 150, "metadata": { "source": "api", "tags": [], "created": 1740678539688 } }, { "id": "trellis-3d", "name": "trellis-3d", "description": "Generate 3D models from your images using Trellis, a native 3D generative model enabling versatile and high-quality 3D asset creation. Send an image to convert it into a 3D model.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1743054517902 } }, { "id": "flux-dev-finetuner", "name": "flux-dev-finetuner", "description": "Fine-tune the FLUX dev model with your own pictures! Upload 8-12 of them (same subject, only one subject in the picture, ideally from different poses and backgrounds) and wait ~2-5 minutes to create your own finetuned bot that will generate pictures of this subject in whatever setting you want.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1727479142160 } }, { "id": "flux-inpaint", "name": "flux-inpaint", "description": "Given an image and a mask (separate images), fills in the region of the image given by the mask as per the prompt. The base image should be the first image attached and the black-and-white mask should be the second image; a text prompt is required and should specify what you want the model to inpaint in the white area of the mask.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1736797755390 } }, { "id": "flux-fill", "name": "flux-fill", "description": "Given an image and a mask (separate images), fills in the region of the image given by the mask as per the prompt. The base image should be the first image attached and the black-and-white mask should be the second image; a text prompt is required and should specify what you want the model to inpaint in the white area of the mask.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1736787123399 } }, { "id": "bria-eraser", "name": "bria-eraser", "description": "Bria Eraser enables precise removal of unwanted objects from images while maintaining high-quality outputs. Trained exclusively on licensed data for safe and risk-free commercial use. Send an image and a black-and-white mask image denoting the objects to be cleared out from the image. The input prompt is only used to create the filename of the output image.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1739957916196 } }, { "id": "aya-vision", "name": "aya-vision", "description": "Aya Vision is a 32B open-weights multimodal model with advanced capabilities optimized for a variety of vision-language use cases. It is model trained to excel in 23 languages in both vision and text: Arabic, Chinese (simplified & traditional), Czech, Dutch, English, French, German, Greek, Hebrew, Hindi, Indonesian, Italian, Japanese, Korean, Persian, Polish, Portuguese, Romanian, Russian, Spanish, Turkish, Ukrainian, and Vietnamese.", "owned_by": "Cohere", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1741042614242 } }, { "id": "kling-1.5-pro", "name": "kling-1.5-pro", "description": "Kling v1.5 video generation bot, hosted by fal.ai. For best results, upload an image attachment. Use `--aspect` to set the aspect ratio. Allowed values are `16:9`, `9:16` and `1:1`. Use `--duration` to set the duration of the generated video (5 or 10 seconds).", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1733347438699 } }, { "id": "deepreasoning", "name": "deepreasoning", "description": "DeepReasoning (previously DeepClaude) is a high-performance LLM inference that combines DeepSeek R1's Chain of Thought (CoT) reasoning capabilities with Anthropic Claude's creative and code generation prowess. It provides a unified interface for leveraging the strengths of both models while maintaining complete control over your data. Learn more: https://deepclaude.com/", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1740454833334 } }, { "id": "gemma-3-27b", "name": "gemma-3-27b", "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to Gemma 2", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1742186137210 } }, { "id": "qwen3-32b-cs", "name": "qwen3-32b-cs", "description": "World’s fastest inference for Qwen 3 32B with Cerebras.", "owned_by": "CerebrasAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1747326165823 } }, { "id": "qwen-2.5-vl-32b", "name": "qwen-2.5-vl-32b", "description": "Qwen2.5-VL-32B's mathematical and problem-solving capabilities have been strengthened through reinforcement learning, leading to a significantly improved user experience. The model's response styles have been refined to better align with human preferences, particularly for objective queries involving mathematics, logical reasoning, and knowledge-based Q&A. As a result, responses now feature greater detail, improved clarity, and enhanced formatting.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1743550499150 } }, { "id": "qwen2.5-vl-72b-t", "name": "qwen2.5-vl-72b-t", "description": "Qwen 2.5 VL 72B, a cutting-edge multimodal model from the Qwen Team, excels in visual and video understanding, multilingual text/image processing (including Japanese, Arabic, and Korean), and dynamic agentic reasoning for automation. It supports long-context comprehension (32K tokens)", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1743431047831 } }, { "id": "mistral-small-3", "name": "mistral-small-3", "description": "Mistral Small 3 is a pre-trained and instructed model catered to the ‘80%’ of generative AI tasks--those that require robust language and instruction following performance, with very low latency. Released under an Apache 2.0 license and comparable to Llama-3.3-70B and Qwen2.5-32B-Instruct.", "owned_by": "Mistral", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 0.09999999999999999, "currency": "USD" }, "output": { "per_million_tokens": 0.3, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1738360161146 } }, { "id": "deepseek-v3-di", "name": "deepseek-v3-di", "description": "Deepseek-v3 – the new top open-source LLM. Achieves state-of-the-art performance in tasks such as coding, mathematics, and reasoning. All data you submit to this bot is governed by the Poe privacy policy and is only sent to DeepInfra, a US-based company.\n\nSupports 64k tokens of input context and 8k tokens of output context. Quantization: FP8 (official).", "owned_by": "DeepInfra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 64000, "metadata": { "source": "api", "tags": [], "created": 1739797458982 } }, { "id": "grok-2", "name": "grok-2", "description": "Grok 2 is xAI's latest and most intelligent language model. It features state-of-the-art capabilities in coding, reasoning, and answering questions. It excels at handling complex and multi-step tasks. Grok 2 does not have access to real-time information from X or the internet as part of its integration with Poe.", "owned_by": "XAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "pricing": { "input": { "per_million_tokens": 2, "currency": "USD" }, "output": { "per_million_tokens": 10, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1736893314102 } }, { "id": "deepseek-v3-turbo-di", "name": "deepseek-v3-turbo-di", "description": "Deepseek-v3 – the new top open-source LLM. Achieves state-of-the-art performance in tasks such as coding, mathematics, and reasoning. Turbo variant is quantized to achieve higher speeds. All data you submit to this bot is governed by the Poe privacy policy and is only sent to DeepInfra, a US-based company.\n\nSupports 32k tokens of input context and 8k tokens of output context. Quantization: FP4 (turbo).", "owned_by": "DeepInfra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32000, "metadata": { "source": "api", "tags": [], "created": 1741250579199 } }, { "id": "phi-4-di", "name": "phi-4-di", "description": "Microsoft Research Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed.\n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nAll data you provide this bot will not be used in training, and is sent only to DeepInfra, a US-based company.\n\nSupports 16k tokens of input context and 8k tokens of output context. Quantization: FP16 (official).", "owned_by": "DeepInfra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16000, "metadata": { "source": "api", "tags": [], "created": 1740490334949 } }, { "id": "mistral-7b-v0.3-di", "name": "mistral-7b-v0.3-di", "description": "Mistral Instruct 7B v0.3 from Mistral AI.\n\nAll data you provide this bot will not be used in training, and is sent only to DeepInfra, a US-based company.\n\nSupports 32k tokens of input context and 8k tokens of output context. Quantization: FP16 (official).", "owned_by": "DeepInfra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32000, "metadata": { "source": "api", "tags": [], "created": 1740490886743 } }, { "id": "aya-expanse-32b", "name": "aya-expanse-32b", "description": "Aya Expanse is a 32B open-weight research release of a model with highly advanced multilingual capabilities. Aya supports state-of-art generative capabilities in 23 languages: Arabic, Chinese (simplified & traditional), Czech, Dutch, English, French, German, Greek, Hebrew, Hindi, Indonesian, Italian, Japanese, Korean, Persian, Polish, Portuguese, Romanian, Russian, Spanish, Turkish, Ukrainian, and Vietnamese.", "owned_by": "Cohere", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1739905182986 } }, { "id": "liveportrait", "name": "liveportrait", "description": "Animates given portraits with the motion's in the video. Powered by fal.ai", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1720556185003 } }, { "id": "llama-3.1-8b-t-128k", "name": "llama-3.1-8b-t-128k", "description": "Llama 3.1 8B Instruct from Meta. Supports 128k tokens of context.\n\nThe points price is subject to change.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1721748216574 } }, { "id": "stablediffusion3-2b", "name": "stablediffusion3-2b", "description": "Stable Diffusion v3 Medium - by fal.ai", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1718216691252 } }, { "id": "mixtral8x22b-inst-fw", "name": "mixtral8x22b-inst-fw", "description": "Mixtral 8x22B Mixture-of-Experts instruct model from Mistral hosted by Fireworks. ", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "metadata": { "source": "api", "tags": [], "created": 1712949013942 } }, { "id": "mistral-large-2", "name": "mistral-large-2", "description": "Mistral's latest text generation model (Mistral-Large-2407) with top-tier reasoning capabilities. It can be used for complex multilingual reasoning tasks, including text understanding, transformation, and code generation. This bot has the full 128k context window supported by the model.", "owned_by": "Mistral", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 3, "currency": "USD" }, "output": { "per_million_tokens": 9, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1708971504266 } }, { "id": "reka-core", "name": "reka-core", "description": "Reka's largest and most capable multimodal language model. Works with text, images, and video inputs. 8k context length.", "owned_by": "Reka AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1713038207102 } }, { "id": "reka-flash", "name": "reka-flash", "description": "Reka's efficient and capable 21B multimodal model optimized for fast workloads and amazing quality. Works with text, images and video inputs.", "owned_by": "Reka AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1707892216404 } }, { "id": "claude-sonnet-3.5-june", "name": "claude-sonnet-3.5-june", "description": "Anthropic's legacy Sonnet 3.5 model, specifically the June 2024 snapshot (for the latest, please use https://poe.com/Claude-Sonnet-3.5). Excels in complex tasks like coding, writing, analysis and visual processing; generally, more verbose than the more concise October 2024 snapshot.", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 189096, "pricing": { "input": { "per_million_tokens": 2.6, "currency": "USD" }, "output": { "per_million_tokens": 13, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1731966954824 } }, { "id": "sketch-to-image", "name": "sketch-to-image", "description": "Takes in sketches and converts them to colored images.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1736176125104 } }, { "id": "qwen2.5-coder-32b", "name": "qwen2.5-coder-32b", "description": "Qwen2.5-Coder is the latest series of code-specific Qwen large language models (formerly known as CodeQwen), developed by Alibaba.", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1731698228854 } }, { "id": "stablediffusion3.5-t", "name": "stablediffusion3.5-t", "description": "Faster version of Stable Diffusion 3 Large, hosted by @fal. Excels for fast image generation. Use \"--aspect\" to select an aspect ratio (e.g --aspect 1:1).", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1729817429663 } }, { "id": "flux-pro-1.1-t", "name": "flux-pro-1.1-t", "description": "The best state of the art image model from BFL. FLUX 1.1 Pro generates images six times faster than its predecessor, FLUX 1 Pro, while also improving image quality, prompt adherence, and output diversity.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1730863432942 } }, { "id": "flux-schnell-t", "name": "flux-schnell-t", "description": "Lightning-fast AI image generation model that excels in producing high-quality visuals in just seconds. Great for quick prototyping or real-time use cases. This is the fastest version of FLUX.1.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1730862046687 } }, { "id": "recraft-v3", "name": "recraft-v3", "description": "Recraft V3, state of the art image generation. Prompt input cannot exceed 1,000 characters.\nUse --style for styles, and --aspect for aspect ratio configuration (16:9, 4:3, 1:1, 3:4, 9:16). \nAvailable styles: realistic_image, digital_illustration, vector_illustration, realistic_image/b_and_w, realistic_image/hard_flash, realistic_image/hdr, realistic_image/natural_light, realistic_image/studio_portrait, realistic_image/enterprise, realistic_image/motion_blur, digital_illustration/pixel_art, digital_illustration/hand_drawn, digital_illustration/grain, digital_illustration/infantile_sketch, digital_illustration/2d_art_poster, digital_illustration/handmade_3d, digital_illustration/hand_drawn_outline, digital_illustration/engraving_color, digital_illustration/2d_art_poster_2, vector_illustration/engraving, vector_illustration/line_art, vector_illustration/line_circuit, vector_illustration/linocut", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1730322043217 } }, { "id": "llama-3-70b-t", "name": "llama-3-70b-t", "description": "Llama 3 70B Instruct from Meta. For most use cases, https://poe.com/Llama-3.3-70B will perform better.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 1713463834064 } }, { "id": "gpt-4o-aug", "name": "gpt-4o-aug", "description": "OpenAI's most powerful model, GPT-4o, using the August 2024 model snapshot. Stronger than GPT-3.5 in quantitative questions (math and physics), creative writing, and many other challenging tasks. \nCheck out the newest version of this bot here: https://poe.com/GPT-5.", "owned_by": "OpenAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "pricing": { "input": { "per_million_tokens": 2.2, "currency": "USD" }, "output": { "per_million_tokens": 9, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1732149774348 } }, { "id": "gpt-4-classic-0314", "name": "gpt-4-classic-0314", "description": "OpenAI's GPT-4 model. Powered by gpt-4-0314 (non-Turbo) for text input and gpt-4o for image input. For most use cases, https://poe.com/GPT-4o will perform significantly better.", "owned_by": "OpenAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 27, "currency": "USD" }, "output": { "per_million_tokens": 54, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1724707714433 } }, { "id": "gpt-4-classic", "name": "gpt-4-classic", "description": "OpenAI's GPT-4 model. Powered by gpt-4-0613 (non-Turbo) for text input and gpt-4o for image input. \nCheck out the newest version of this bot here: https://poe.com/GPT-5.", "owned_by": "OpenAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "pricing": { "input": { "per_million_tokens": 27, "currency": "USD" }, "output": { "per_million_tokens": 54, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1711404454811 } }, { "id": "solar-pro-2", "name": "solar-pro-2", "description": "Solar Pro 2 is Upstage's latest frontier-scale LLM. With just 31B parameters, it delivers top-tier performance through world-class multilingual support, advanced reasoning, and real-world tool use. Especially in Korean, it outperforms much larger models across critical benchmarks. Built for the next generation of practical LLMs, Solar Pro 2 proves that smaller models can still lead. Supports a context length of 64k tokens.", "owned_by": "Upstage", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "metadata": { "source": "api", "tags": [], "created": 1694610718864 } }, { "id": "remove-background", "name": "remove-background", "description": "Remove background from your images", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1714848450172 } }, { "id": "sana-t2i", "name": "sana-t2i", "description": "SANA can synthesize high-resolution, high-quality images at a remarkably fast rate, with the ability to generate 4K images in less than a second.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1736139178094 } }, { "id": "mistral-7b-v0.3-t", "name": "mistral-7b-v0.3-t", "description": "Mistral Instruct 7B v0.3 from Mistral AI.\n\nThe points price is subject to change.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1716798156279 } }, { "id": "tako", "name": "tako", "description": "Tako is a bot that transforms your questions about stocks, sports, economics or politics into interactive, shareable knowledge cards from trusted sources. Tako's knowledge graph is built exclusively from authoritative, real-time data providers, and is embeddable in your apps, research and storytelling. You can adjust the specificity threshold by typing `--specificity 30` (or a value between 0 - 100) at the end of your query/question; the default is 60.", "owned_by": "TryTako", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 2048, "metadata": { "source": "api", "tags": [], "created": 1723756137465 } }, { "id": "llama-3.1-405b-fp16", "name": "llama-3.1-405b-fp16", "description": "The Biggest and Best open-source AI model trained by Meta, beating GPT-4o across most benchmarks. This bot is in BF16 and with 128K context length.", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131000, "metadata": { "source": "api", "tags": [], "created": 1724034411290 } }, { "id": "llama-3.1-8b-fp16", "name": "llama-3.1-8b-fp16", "description": "The smallest and fastest member of the Llama 3.1 family, offering exceptional efficiency and rapid response times with 128K context length.", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131000, "metadata": { "source": "api", "tags": [], "created": 1724034517400 } }, { "id": "llama-3.1-70b-fp16", "name": "llama-3.1-70b-fp16", "description": "The best LLM at its size with faster response times compared to the 405B model with 128K context length.", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131000, "metadata": { "source": "api", "tags": [], "created": 1724034470327 } }, { "id": "llama-3-70b-fp16", "name": "llama-3-70b-fp16", "description": "A highly efficient and powerful model designed for a veriety of tasks with 128K context length.", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131000, "metadata": { "source": "api", "tags": [], "created": 1724034563488 } }, { "id": "restyler", "name": "restyler", "description": "This bot enables rapid transformation of existing images, delivering high-quality style transfers and image modifications. Takes in a text input and an image attachment. Use --strength to control the guidance given by the initial image, with higher values adhering to the image more strongly.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1739302186273 } }, { "id": "stablediffusionxl", "name": "stablediffusionxl", "description": "Generates high quality images based on the user's most recent prompt. \n\nAllows users to specify elements to avoid in the image using the \"--no\" parameter at the end of the prompt. Select an aspect ratio with \"--aspect\". (e.g. \"Tall trees, daylight --no rain --aspect 7:4\"). Valid aspect ratios are 1:1, 7:4, 4:7, 9:7, 7:9, 19:13, 13:19, 12:5, & 5:12. \n\nPowered by Stable Diffusion XL.", "owned_by": "StabilityAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 200, "metadata": { "source": "api", "tags": [], "created": 1688868065472 } }, { "id": "qwen-2.5-7b-t", "name": "qwen-2.5-7b-t", "description": "Qwen 2.5 7B from Alibaba. Excels in coding, math, instruction following, natural language understanding, and has great multilangual support with more than 29 languages.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1730863674687 } }, { "id": "qwen-2.5-72b-t", "name": "qwen-2.5-72b-t", "description": "Qwen 2.5 72B from Alibaba. Excels in coding, math, instruction following, natural language understanding, and has great multilangual support with more than 29 languages. \n\nDelivering results on par with Llama-3-405B despite using only one-fifth of the parameters.", "owned_by": "Together AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1730863910082 } }, { "id": "python", "name": "python", "description": "Executes Python code (version 3.11) from the user message and outputs the results. If there are code blocks in the user message (surrounded by triple backticks), then only the code blocks will be executed. These libraries are imported into this bot's run-time automatically -- numpy, pandas, requests, matplotlib, scikit-learn, torch, PyYAML, tensorflow, scipy, pytest -- along with ~150 of the most widely used Python libraries.", "owned_by": "Poe", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131000, "metadata": { "source": "api", "tags": [], "created": 1724756919380 } }, { "id": "markitdown", "name": "markitdown", "description": "Convert anything to Markdown: URLs, PDFs, Word, Excel, PowerPoint, images (EXIF metadata), audio (EXIF metadata and transcription), and more. This bot wraps Microsoft’s MarkItDown MCP server (https://github.com/microsoft/markitdown).", "owned_by": "OpenTools", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1746488364378 } }, { "id": "flux-1-schnell-fw", "name": "flux-1-schnell-fw", "description": "FLUX.1 [schnell] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.\n\nKey Features\n1. Cutting-edge output quality and competitive prompt following, matching the performance of closed source alternatives.\n2. Trained using latent adversarial diffusion distillation, FLUX.1 [schnell] can generate high-quality images in only 1 to 4 steps.\n3. Released under the apache-2.0 licence, the model can be used for personal, scientific, and commercial purposes.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1729619977045 } }, { "id": "flux-1-dev-fw", "name": "flux-1-dev-fw", "description": "FLUX.1 [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.\n\nKey Features\n1. Cutting-edge output quality, second only to our state-of-the-art model FLUX.1 [pro].\n2. Competitive prompt following, matching the performance of closed source alternatives.\n3. Trained using guidance distillation, making FLUX.1 [dev] more efficient.\n4. Open weights to drive new scientific research, and empower artists to develop innovative workflows.\n5. Generated outputs can be used for personal, scientific, and commercial purposes as described in the FLUX.1 [dev] Non-Commercial License.", "owned_by": "Fireworks AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 256, "metadata": { "source": "api", "tags": [], "created": 1729618505818 } }, { "id": "mochi-preview", "name": "mochi-preview", "description": "Open state-of-the-art video generation model with high-fidelity motion and strong prompt adherence. Supports both text-to-video and image-to-video. Generates 5 second video.", "owned_by": "fal", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1729817676311 } }, { "id": "gpt-3.5-turbo-raw", "name": "gpt-3.5-turbo-raw", "description": "Powered by gpt-3.5-turbo without a system prompt.\nCheck out the newest version of this bot here: https://poe.com/GPT-5.", "owned_by": "OpenAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4524, "pricing": { "input": { "per_million_tokens": 0.44999999999999996, "currency": "USD" }, "output": { "per_million_tokens": 1.4, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1695849978857 } }, { "id": "interpreter", "name": "interpreter", "description": "Interpreter for Poe Python", "owned_by": "Poe Tools", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1715753807980 } }, { "id": "claude-haiku-3", "name": "claude-haiku-3", "description": "Anthropic's Claude Haiku 3 outperforms models in its intelligence category on performance, speed and cost without the need for specialized fine-tuning. The compute points value is subject to change. For most use cases, https://poe.com/Claude-Haiku-3.5 will be better.", "owned_by": "Anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 189096, "pricing": { "input": { "per_million_tokens": 0.21, "currency": "USD" }, "output": { "per_million_tokens": 1.1, "currency": "USD" } }, "metadata": { "source": "api", "tags": [], "created": 1709942726436 } }, { "id": "code-saver", "name": "code-saver", "description": "A system bot that handles Poe scripts in chat.", "owned_by": "Poe Tools", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754063583549 } }, { "id": "code-editor", "name": "code-editor", "description": "Official code editor for Poe Scripting using Python, used to connect multiple Poe bots and create AI workflows. Guide and tips: https://creator.poe.com/docs/script-bots/poe-python-reference", "owned_by": "Poe Tools", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1748902288662 } }, { "id": "exa-answer", "name": "exa-answer", "description": "Get a quick LLM-style answer to a question informed by Exa search results. \n\nFor more in-depth results, consider using the following endpoint: https://poe.com/Exa-Research\n\nParameter Controls Available:\n- `--text false/true` Show text snippets under each source citation (default: false)", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764188987964 } }, { "id": "exa-search", "name": "exa-search", "description": "Utilize Exa's technology for searching web pages, finding similar web pages, crawling, and more.\nNote: This endpoint does not return an LLM-style response (visit the following if you want an LLM-style response: https://poe.com/Exa-Answer or https://poe.com/Exa-Research). File upload is not supported. \n\nParameter Controls Available:\n1. Operation Mode\n - Default: `--operation search` (web search)\n - For finding similar pages: `--operation similar`\n - For getting page contents: `--operation contents`\n - For code search: `--operation code`\n\n2. Basic Search Controls (search & similar operations)\n - `--num_results [1-100]` number of results to return (default: 10)\n - `--search_type [auto|neural|deep|fast]` search algorithm (default: auto)\n - `--category [company|research paper|news|pdf|github|tweet|personal site|linkedin profile|financial report]`\n - `--show_content` display full page content in results\n\n3. Domain & Text Filters (search operation)\n - `--include_domains domain1.com,domain2.com` comma-separated domains to include\n - `--exclude_domains spam.com,unwanted.org` comma-separated domains to exclude\n - `--include_text \"text phrase\"` text that must appear (up to 5 words)\n - `--exclude_text \"unwanted phrase\"` text that must NOT appear (up to 5 words)\n\n4. Date Filters (search operation)\n - `--start_crawl_date 2024-01-01T00:00:00.000Z` results crawled after this date\n - `--end_crawl_date 2024-12-31T00:00:00.000Z` results crawled before this date\n - `--start_published_date 2024-01-01T00:00:00.000Z` content published after\n - `--end_published_date 2024-12-31T00:00:00.000Z` content published before\n\n5. Content Options\n - `--return_text` fetch page text content (default: true)\n - `--text_max_chars [number]` limit text length (empty = unlimited)\n - `--include_html_tags` preserve HTML structure\n - `--return_highlights` get AI-selected key snippets\n - `--highlights_sentences [1-10]` sentences per highlight (default: 3)\n - `--highlights_per_url [1-10]` highlights per result (default: 3)\n - `--highlights_query \"guide text\"` guide highlight selection\n - `--return_summary` get AI-generated summaries\n - `--summary_query \"focus topic\"` guide summary generation\n\n6. Code Search Controls (only for code operation)\n - `--code_tokens [dynamic|5000|10000|20000]` response length (default: dynamic)\n\n7. Advanced Options\n - `--livecrawl [fallback|never|always|preferred]` when to fetch fresh content (default: fallback)\n - `--subpages [0-10]` number of linked subpages to crawl (default: 0)\n - `--subpage_target \"keyword\"` find specific subpages matching keyword", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764132329592 } }, { "id": "zai-org/glm-4.6v-flash", "name": "zai-org/glm-4.6v-flash", "owned_by": "zai-org", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765089004 } }, { "id": "essentialai/rnj-1-instruct", "name": "essentialai/rnj-1-instruct", "owned_by": "EssentialAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764817632 } }, { "id": "servicenow-ai/apriel-1.6-15b-thinker", "name": "servicenow-ai/apriel-1.6-15b-thinker", "owned_by": "ServiceNow-AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764372752 } }, { "id": "qwen/qwen3-4b-instruct-2507", "name": "qwen/qwen3-4b-instruct-2507", "owned_by": "Qwen", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754391483 } }, { "id": "zai-org/glm-4.6v-fp8", "name": "zai-org/glm-4.6v-fp8", "owned_by": "zai-org", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765092078 } }, { "id": "huggingfacetb/smollm3-3b", "name": "huggingfacetb/smollm3-3b", "owned_by": "HuggingFaceTB", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751969505 } }, { "id": "meta-llama/meta-llama-3-8b-instruct", "name": "meta-llama/meta-llama-3-8b-instruct", "owned_by": "meta-llama", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1713346512 } }, { "id": "qwen/qwen3-4b-thinking-2507", "name": "qwen/qwen3-4b-thinking-2507", "owned_by": "Qwen", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754391747 } }, { "id": "sao10k/l3-8b-stheno-v3.2", "name": "sao10k/l3-8b-stheno-v3.2", "owned_by": "Sao10K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1717583457 } }, { "id": "swiss-ai/apertus-8b-instruct-2509", "name": "swiss-ai/apertus-8b-instruct-2509", "owned_by": "swiss-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755077423 } }, { "id": "zai-org/glm-4.6-fp8", "name": "zai-org/glm-4.6-fp8", "owned_by": "zai-org", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759132340 } }, { "id": "nvidia/nvidia-nemotron-nano-12b-v2", "name": "nvidia/nvidia-nemotron-nano-12b-v2", "owned_by": "nvidia", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755739910 } }, { "id": "coherelabs/command-a-reasoning-08-2025", "name": "coherelabs/command-a-reasoning-08-2025", "owned_by": "CohereLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755012430 } }, { "id": "zai-org/glm-4.1v-9b-thinking", "name": "zai-org/glm-4.1v-9b-thinking", "owned_by": "zai-org", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751120650 } }, { "id": "deepseek-ai/deepseek-r1-0528-qwen3-8b", "name": "deepseek-ai/deepseek-r1-0528-qwen3-8b", "owned_by": "deepseek-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1748516867 } }, { "id": "dicta-il/dictalm-3.0-24b-thinking", "name": "dicta-il/dictalm-3.0-24b-thinking", "owned_by": "dicta-il", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764618951 } }, { "id": "coherelabs/command-a-translate-08-2025", "name": "coherelabs/command-a-translate-08-2025", "owned_by": "CohereLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1756300537 } }, { "id": "qwen/qwen2.5-coder-3b-instruct", "name": "qwen/qwen2.5-coder-3b-instruct", "owned_by": "Qwen", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1730879378 } }, { "id": "deepseek-ai/deepseek-v3", "name": "deepseek-ai/deepseek-v3", "owned_by": "deepseek-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1735131143 } }, { "id": "baidu/ernie-4.5-0.3b-pt", "name": "baidu/ernie-4.5-0.3b-pt", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751091102 } }, { "id": "katanemo/arch-router-1.5b", "name": "katanemo/arch-router-1.5b", "owned_by": "katanemo", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1748628983 } }, { "id": "baidu/ernie-4.5-21b-a3b-pt", "name": "baidu/ernie-4.5-21b-a3b-pt", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751091210 } }, { "id": "meta-llama/meta-llama-3-70b-instruct", "name": "meta-llama/meta-llama-3-70b-instruct", "owned_by": "meta-llama", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1713346494 } }, { "id": "aisingapore/gemma-sea-lion-v4-27b-it", "name": "aisingapore/gemma-sea-lion-v4-27b-it", "owned_by": "aisingapore", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754898060 } }, { "id": "swiss-ai/apertus-70b-instruct-2509", "name": "swiss-ai/apertus-70b-instruct-2509", "owned_by": "swiss-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1756742628 } }, { "id": "coherelabs/aya-expanse-32b", "name": "coherelabs/aya-expanse-32b", "owned_by": "CohereLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1729665764 } }, { "id": "coherelabs/command-a-vision-07-2025", "name": "coherelabs/command-a-vision-07-2025", "owned_by": "CohereLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753689307 } }, { "id": "coherelabs/c4ai-command-r7b-arabic-02-2025", "name": "coherelabs/c4ai-command-r7b-arabic-02-2025", "owned_by": "CohereLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1740643869 } }, { "id": "tokyotech-llm/llama-3.3-swallow-70b-instruct-v0.4", "name": "tokyotech-llm/llama-3.3-swallow-70b-instruct-v0.4", "owned_by": "tokyotech-llm", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1740976430 } }, { "id": "coherelabs/c4ai-command-a-03-2025", "name": "coherelabs/c4ai-command-a-03-2025", "owned_by": "CohereLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1741684205 } }, { "id": "sao10k/l3-8b-lunaris-v1", "name": "sao10k/l3-8b-lunaris-v1", "owned_by": "Sao10K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1719362412 } }, { "id": "deepcogito/cogito-671b-v2.1-fp8", "name": "deepcogito/cogito-671b-v2.1-fp8", "owned_by": "deepcogito", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761369429 } }, { "id": "deepseek-ai/deepseek-v3-0324", "name": "deepseek-ai/deepseek-v3-0324", "owned_by": "deepseek-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1742808502 } }, { "id": "qwen/qwen2.5-coder-7b", "name": "qwen/qwen2.5-coder-7b", "owned_by": "Qwen", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1726487844 } }, { "id": "baidu/ernie-4.5-vl-424b-a47b-base-pt", "name": "baidu/ernie-4.5-vl-424b-a47b-base-pt", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751089776 } }, { "id": "zai-org/glm-4.5v-fp8", "name": "zai-org/glm-4.5v-fp8", "owned_by": "zai-org", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754833284 } }, { "id": "coherelabs/c4ai-command-r-08-2024", "name": "coherelabs/c4ai-command-r-08-2024", "owned_by": "CohereLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1724053211 } }, { "id": "primeintellect/intellect-3-fp8", "name": "primeintellect/intellect-3-fp8", "owned_by": "PrimeIntellect", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764177844 } }, { "id": "baidu/ernie-4.5-vl-28b-a3b-pt", "name": "baidu/ernie-4.5-vl-28b-a3b-pt", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751089833 } }, { "id": "deepcogito/cogito-671b-v2.1", "name": "deepcogito/cogito-671b-v2.1", "owned_by": "deepcogito", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761344296 } }, { "id": "coherelabs/c4ai-command-r7b-12-2024", "name": "coherelabs/c4ai-command-r7b-12-2024", "owned_by": "CohereLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1733932505 } }, { "id": "alpindale/wizardlm-2-8x22b", "name": "alpindale/wizardlm-2-8x22b", "owned_by": "alpindale", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1713235019 } }, { "id": "aisingapore/qwen-sea-lion-v4-32b-it", "name": "aisingapore/qwen-sea-lion-v4-32b-it", "owned_by": "aisingapore", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760597914 } }, { "id": "coherelabs/aya-vision-32b", "name": "coherelabs/aya-vision-32b", "owned_by": "CohereLabs", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1740951400 } }, { "id": "marin-community/marin-8b-instruct", "name": "marin-community/marin-8b-instruct", "owned_by": "marin-community", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1747241066 } }, { "id": "sao10k/l3-70b-euryale-v2.1", "name": "sao10k/l3-70b-euryale-v2.1", "owned_by": "Sao10K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1718134069 } }, { "id": "zai-org/glm-4.5-air-fp8", "name": "zai-org/glm-4.5-air-fp8", "owned_by": "zai-org", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1752982200 } }, { "id": "zai-org/glm-4-32b-0414", "name": "zai-org/glm-4-32b-0414", "owned_by": "zai-org", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1744030421 } }, { "id": "qwen/qwen3-coder-480b-a35b-instruct-fp8", "name": "qwen/qwen3-coder-480b-a35b-instruct-fp8", "owned_by": "Qwen", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753197829 } }, { "id": "baidu/ernie-4.5-300b-a47b-base-pt", "name": "baidu/ernie-4.5-300b-a47b-base-pt", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751089026 } }, { "id": "baichuan-inc/baichuan-m2-32b", "name": "baichuan-inc/baichuan-m2-32b", "owned_by": "baichuan-inc", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754808013 } }, { "id": "agent/deepseek-v3.1-terminus", "name": "agent/deepseek-v3.1-terminus", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/deepseek-v3.1-terminus(free)", "name": "agent/deepseek-v3.1-terminus(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/deepseek-v3.2", "name": "agent/deepseek-v3.2", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/deepseek-v3.2(free)", "name": "agent/deepseek-v3.2(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/glm-4.6", "name": "agent/glm-4.6", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/glm-4.6(free)", "name": "agent/glm-4.6(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/glm-4.6v", "name": "agent/glm-4.6v", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/kimi-k2-0905", "name": "agent/kimi-k2-0905", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/kimi-k2-0905(free)", "name": "agent/kimi-k2-0905(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/kimi-k2-thinking(free)", "name": "agent/kimi-k2-thinking(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/minimax-m2", "name": "agent/minimax-m2", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/minimax-m2(free)", "name": "agent/minimax-m2(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/qwen3-coder-30b-a3b-instruct", "name": "agent/qwen3-coder-30b-a3b-instruct", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/qwen3-coder-30b-a3b-instruct(free)", "name": "agent/qwen3-coder-30b-a3b-instruct(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/qwen3-coder-480b-a35b-instruct", "name": "agent/qwen3-coder-480b-a35b-instruct", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/qwen3-coder-480b-a35b-instruct(free)", "name": "agent/qwen3-coder-480b-a35b-instruct(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/qwen3-next-80b-a3b-instruct", "name": "agent/qwen3-next-80b-a3b-instruct", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "agent/qwen3-next-80b-a3b-thinking", "name": "agent/qwen3-next-80b-a3b-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "baai/bge-m3(free)", "name": "baai/bge-m3(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "baai/bge-reranker-v2-m3(free)", "name": "baai/bge-reranker-v2-m3(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "baal/bge-reranker-v2-m3", "name": "baal/bge-reranker-v2-m3", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "bytedance/seed-oss-36b-instruct(free)", "name": "bytedance/seed-oss-36b-instruct(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek/deepseek-ocr(free)", "name": "deepseek/deepseek-ocr(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek/deepseek-v3.1-fast", "name": "deepseek/deepseek-v3.1-fast", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek/deepseek-v3.1-terminus(free)", "name": "deepseek/deepseek-v3.1-terminus(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek/deepseek-v3.2-fast", "name": "deepseek/deepseek-v3.2-fast", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek/deepseek-v3.2(free)", "name": "deepseek/deepseek-v3.2(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "inclusionai/ling-1t(free)", "name": "inclusionai/ling-1t(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kwai-kolors/kolors(free)", "name": "kwai-kolors/kolors(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "minimax/minimax-m2(free)", "name": "minimax/minimax-m2(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "moonshotai/kimi-k2-0905(free)", "name": "moonshotai/kimi-k2-0905(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "moonshotai/kimi-k2-thinking-turbo", "name": "moonshotai/kimi-k2-thinking-turbo", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "moonshotai/kimi-k2-thinking(free)", "name": "moonshotai/kimi-k2-thinking(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "openai/gpt-image-1", "name": "openai/gpt-image-1", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "openai/o1-mini", "name": "openai/o1-mini", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen-image-edit-2509(free)", "name": "qwen/qwen-image-edit-2509(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen-image-edit(free)", "name": "qwen/qwen-image-edit(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen-image(free)", "name": "qwen/qwen-image(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-235b-a22b-instruct-2507(free)", "name": "qwen/qwen3-235b-a22b-instruct-2507(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-235b-a22b-thinking-2507(free)", "name": "qwen/qwen3-235b-a22b-thinking-2507(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-30b-a3b-instruct-2507(free)", "name": "qwen/qwen3-30b-a3b-instruct-2507(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-30b-a3b-thinking-2507(free)", "name": "qwen/qwen3-30b-a3b-thinking-2507(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-8b(free)", "name": "qwen/qwen3-8b(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-coder-30b-a3b-instruct(free)", "name": "qwen/qwen3-coder-30b-a3b-instruct(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-coder-480b-a35b-instruct(free)", "name": "qwen/qwen3-coder-480b-a35b-instruct(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-embedding-0.6b(free)", "name": "qwen/qwen3-embedding-0.6b(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-embedding-4b", "name": "qwen/qwen3-embedding-4b", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-embedding-4b(free)", "name": "qwen/qwen3-embedding-4b(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-embedding-8b", "name": "qwen/qwen3-embedding-8b", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-embedding-8b(free)", "name": "qwen/qwen3-embedding-8b(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-omni-30b-a3b-instruct(free)", "name": "qwen/qwen3-omni-30b-a3b-instruct(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-omni-30b-a3b-thinking(free)", "name": "qwen/qwen3-omni-30b-a3b-thinking(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-reranker-0.6b(free)", "name": "qwen/qwen3-reranker-0.6b(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-reranker-4b", "name": "qwen/qwen3-reranker-4b", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-reranker-4b(free)", "name": "qwen/qwen3-reranker-4b(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-reranker-8b", "name": "qwen/qwen3-reranker-8b", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-reranker-8b(free)", "name": "qwen/qwen3-reranker-8b(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-vl-235b-a22b-instruct(free)", "name": "qwen/qwen3-vl-235b-a22b-instruct(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-vl-235b-a22b-thinking(free)", "name": "qwen/qwen3-vl-235b-a22b-thinking(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-vl-30b-a3b-instruct(free)", "name": "qwen/qwen3-vl-30b-a3b-instruct(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen3-vl-30b-a3b-thinking(free)", "name": "qwen/qwen3-vl-30b-a3b-thinking(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "tencent/hunyuan-mt-7b(free)", "name": "tencent/hunyuan-mt-7b(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "x-ai/grok-2-image", "name": "x-ai/grok-2-image", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "x-ai/grok-4-1-fast-non-reasoning", "name": "x-ai/grok-4-1-fast-non-reasoning", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "x-ai/grok-4-1-fast-reasoning", "name": "x-ai/grok-4-1-fast-reasoning", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "x-ai/grok-4-fast-non-reasoning", "name": "x-ai/grok-4-fast-non-reasoning", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "x-ai/grok-4-fast-reasoning", "name": "x-ai/grok-4-fast-reasoning", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "z-ai/glm-4.5-flash", "name": "z-ai/glm-4.5-flash", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "z-ai/glm-4.5(free)", "name": "z-ai/glm-4.5(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "z-ai/glm-4.5v(free)", "name": "z-ai/glm-4.5v(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "z-ai/glm-4.6(free)", "name": "z-ai/glm-4.6(free)", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "z-ai/glm-4.6v", "name": "z-ai/glm-4.6v", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "pro/deepseek-ai/deepseek-v3.2", "name": "pro/deepseek-ai/deepseek-v3.2", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/deepseek-ai/deepseek-v3.1-terminus", "name": "pro/deepseek-ai/deepseek-v3.1-terminus", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/deepseek-ai/deepseek-r1", "name": "pro/deepseek-ai/deepseek-r1", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/deepseek-ai/deepseek-v3", "name": "pro/deepseek-ai/deepseek-v3", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/moonshotai/kimi-k2-thinking", "name": "pro/moonshotai/kimi-k2-thinking", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "kwaipilot/kat-dev", "name": "kwaipilot/kat-dev", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen3-vl-32b-instruct", "name": "qwen/qwen3-vl-32b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen3-vl-32b-thinking", "name": "qwen/qwen3-vl-32b-thinking", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen3-omni-30b-a3b-instruct", "name": "qwen/qwen3-omni-30b-a3b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen3-omni-30b-a3b-thinking", "name": "qwen/qwen3-omni-30b-a3b-thinking", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen3-omni-30b-a3b-captioner", "name": "qwen/qwen3-omni-30b-a3b-captioner", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "deepseek-ai/deepseek-ocr", "name": "deepseek-ai/deepseek-ocr", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/moonshotai/kimi-k2-instruct-0905", "name": "pro/moonshotai/kimi-k2-instruct-0905", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen-image-edit-2509", "name": "qwen/qwen-image-edit-2509", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen-image", "name": "qwen/qwen-image", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "wan-ai/wan2.2-i2v-a14b", "name": "wan-ai/wan2.2-i2v-a14b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "wan-ai/wan2.2-t2v-a14b", "name": "wan-ai/wan2.2-t2v-a14b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "teleai/telespeechasr", "name": "teleai/telespeechasr", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "tongyi-zhiwen/qwenlong-l1-32b", "name": "tongyi-zhiwen/qwenlong-l1-32b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen3-reranker-0.6b", "name": "qwen/qwen3-reranker-0.6b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen3-embedding-0.6b", "name": "qwen/qwen3-embedding-0.6b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "ascend-tribe/pangu-pro-moe", "name": "ascend-tribe/pangu-pro-moe", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/qwen/qwen2.5-vl-7b-instruct", "name": "pro/qwen/qwen2.5-vl-7b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "fnlp/moss-ttsd-v0.5", "name": "fnlp/moss-ttsd-v0.5", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "funaudiollm/cosyvoice2-0.5b", "name": "funaudiollm/cosyvoice2-0.5b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "funaudiollm/sensevoicesmall", "name": "funaudiollm/sensevoicesmall", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "indexteam/indextts-2", "name": "indexteam/indextts-2", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "netease-youdao/bce-embedding-base_v1", "name": "netease-youdao/bce-embedding-base_v1", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "netease-youdao/bce-reranker-base_v1", "name": "netease-youdao/bce-reranker-base_v1", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "kwai-kolors/kolors", "name": "kwai-kolors/kolors", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen2-vl-72b-instruct", "name": "qwen/qwen2-vl-72b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "internlm/internlm2_5-7b-chat", "name": "internlm/internlm2_5-7b-chat", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "thudm/glm-4-9b-chat", "name": "thudm/glm-4-9b-chat", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "lora/qwen/qwen2.5-32b-instruct", "name": "lora/qwen/qwen2.5-32b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "lora/qwen/qwen2.5-14b-instruct", "name": "lora/qwen/qwen2.5-14b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/deepseek-ai/deepseek-r1-distill-qwen-7b", "name": "pro/deepseek-ai/deepseek-r1-distill-qwen-7b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/qwen/qwen2.5-coder-7b-instruct", "name": "pro/qwen/qwen2.5-coder-7b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/baai/bge-m3", "name": "pro/baai/bge-m3", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/qwen/qwen2.5-7b-instruct", "name": "pro/qwen/qwen2.5-7b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/baai/bge-reranker-v2-m3", "name": "pro/baai/bge-reranker-v2-m3", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "lora/qwen/qwen2.5-72b-instruct", "name": "lora/qwen/qwen2.5-72b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/qwen/qwen2-7b-instruct", "name": "pro/qwen/qwen2-7b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "lora/qwen/qwen2.5-7b-instruct", "name": "lora/qwen/qwen2.5-7b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/thudm/glm-4-9b-chat", "name": "pro/thudm/glm-4-9b-chat", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "thudm/glm-z1-rumination-32b-0414", "name": "thudm/glm-z1-rumination-32b-0414", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "black-forest-labs/flux.1-schnell", "name": "black-forest-labs/flux.1-schnell", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "black-forest-labs/flux.1-dev", "name": "black-forest-labs/flux.1-dev", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "pro/black-forest-labs/flux.1-schnell", "name": "pro/black-forest-labs/flux.1-schnell", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "fishaudio/fish-speech-1.4", "name": "fishaudio/fish-speech-1.4", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "rvc-boss/gpt-sovits", "name": "rvc-boss/gpt-sovits", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "fishaudio/fish-speech-1.5", "name": "fishaudio/fish-speech-1.5", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "black-forest-labs/flux.1-pro", "name": "black-forest-labs/flux.1-pro", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "seedllm/seed-rice-7b", "name": "seedllm/seed-rice-7b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "abab5.5-chat", "name": "abab5.5-chat", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "abab5.5s-chat", "name": "abab5.5s-chat", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "abab6.5g-chat", "name": "abab6.5g-chat", "owned_by": "abab6.5g-chat", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "abab6.5s-chat", "name": "abab6.5s-chat", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "abab6.5t-chat", "name": "abab6.5t-chat", "owned_by": "abab6.5t-chat", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ascend/deepseek-r1", "name": "ascend/deepseek-r1", "owned_by": "Ascend/DeepSeek-R1", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "azure/deepseek-r1", "name": "azure/deepseek-r1", "owned_by": "Azure/DeepSeek-R1", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "bce-reranker-base_v1", "name": "bce-reranker-base_v1", "owned_by": "bce-reranker-base_v1", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "bge-reranker-v2-m3", "name": "bge-reranker-v2-m3", "owned_by": "bge-reranker-v2-m3", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "charglm-3", "name": "charglm-3", "owned_by": "charglm-3", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "chatglm-pro", "name": "chatglm-pro", "owned_by": "chatglm-pro", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "chirp-v3-0", "name": "chirp-v3-0", "owned_by": "chirp-v3-0", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "chirp-v3-5", "name": "chirp-v3-5", "owned_by": "chirp-v3-5", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-3-5-haiku-20241022", "name": "claude-3-5-haiku-20241022", "owned_by": "aws", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-3-5-haiku-20241022-cursor", "name": "claude-3-5-haiku-20241022-cursor", "owned_by": "claude-3-5-haiku-20241022-cursor", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-3-5-haiku-latest", "name": "claude-3-5-haiku-latest", "owned_by": "anthropic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-3-5-sonnet-20241022", "name": "claude-3-5-sonnet-20241022", "owned_by": "aws", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-3-5-sonnet-all", "name": "claude-3-5-sonnet-all", "owned_by": "claude-3-5-sonnet-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-3-5-sonnet-latest", "name": "claude-3-5-sonnet-latest", "owned_by": "aws", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-3-7-sonnet-20250219", "name": "claude-3-7-sonnet-20250219", "owned_by": "aws", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-3-7-sonnet-20250219-all", "name": "claude-3-7-sonnet-20250219-all", "owned_by": "claude-3-7-sonnet-20250219-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-3-7-sonnet-20250219-thinking", "name": "claude-3-7-sonnet-20250219-thinking", "owned_by": "claude-3-7-sonnet-20250219-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-3-7-sonnet-latest", "name": "claude-3-7-sonnet-latest", "owned_by": "claude-3-7-sonnet-latest", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-3-7-sonnet-thinking", "name": "claude-3-7-sonnet-thinking", "owned_by": "claude-3-7-sonnet-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-3-7-sonnet-thinking-all", "name": "claude-3-7-sonnet-thinking-all", "owned_by": "claude-3-7-sonnet-thinking-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-3-haiku-20240307-cursor", "name": "claude-3-haiku-20240307-cursor", "owned_by": "claude-3-haiku-20240307-cursor", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-opus-4-1-20250805", "name": "claude-opus-4-1-20250805", "owned_by": "claude-opus-4-1-20250805", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-opus-4-1-20250805-thinking", "name": "claude-opus-4-1-20250805-thinking", "owned_by": "claude-opus-4-1-20250805-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-opus-4-20250514", "name": "claude-opus-4-20250514", "owned_by": "aws", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-opus-4-20250514-thinking", "name": "claude-opus-4-20250514-thinking", "owned_by": "claude-opus-4-20250514-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-sonnet-4-20250514", "name": "claude-sonnet-4-20250514", "owned_by": "aws", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-sonnet-4-20250514-thinking", "name": "claude-sonnet-4-20250514-thinking", "owned_by": "claude-sonnet-4-20250514-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-sonnet-4-5-20250929", "name": "claude-sonnet-4-5-20250929", "owned_by": "claude-sonnet-4-5-20250929", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "claude-sonnet-4-5-20250929-thinking", "name": "claude-sonnet-4-5-20250929-thinking", "owned_by": "claude-sonnet-4-5-20250929-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "codegeex-4", "name": "codegeex-4", "owned_by": "zhipu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "concise", "name": "concise", "owned_by": "concise", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "concise-scholar", "name": "concise-scholar", "owned_by": "concise-scholar", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "deepseek-ai/deepseek-prover-v2", "name": "deepseek-ai/deepseek-prover-v2", "owned_by": "deepseek-ai/DeepSeek-Prover-V2", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "deepseek-ai/deepseek-prover-v2-search", "name": "deepseek-ai/deepseek-prover-v2-search", "owned_by": "deepseek-ai/DeepSeek-Prover-V2-search", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "deepseek-ai/deepseek-r1-search", "name": "deepseek-ai/deepseek-r1-search", "owned_by": "deepseek-ai/DeepSeek-R1-search", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "deepseek-reasoner", "name": "deepseek-reasoner", "owned_by": "deepseek-reasoner", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "deepseek-reasoner-search", "name": "deepseek-reasoner-search", "owned_by": "deepseek-reasoner-search", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "deepseek-search", "name": "deepseek-search", "owned_by": "deepseek-search", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "detail", "name": "detail", "owned_by": "detail", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "detail-scholar", "name": "detail-scholar", "owned_by": "detail-scholar", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "doubao-embedding", "name": "doubao-embedding", "owned_by": "doubao", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-tts", "name": "doubao-tts", "owned_by": "Doubao-tts", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "embedding-001", "name": "embedding-001", "owned_by": "embedding-001", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "embedding-gecko-001", "name": "embedding-gecko-001", "owned_by": "embedding-gecko-001", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "emohaa", "name": "emohaa", "owned_by": "zhipu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "ernie-3.5-128k", "name": "ernie-3.5-128k", "owned_by": "ERNIE-3.5-128K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-3.5-8k", "name": "ernie-3.5-8k", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "ernie-3.5-8k-preview", "name": "ernie-3.5-8k-preview", "owned_by": "ERNIE-3.5-8K-Preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-4.0-8k", "name": "ernie-4.0-8k", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "ernie-4.0-8k-latest", "name": "ernie-4.0-8k-latest", "owned_by": "ERNIE-4.0-8K-Latest", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-4.0-8k-preview", "name": "ernie-4.0-8k-preview", "owned_by": "ERNIE-4.0-8K-Preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-4.0-turbo-128k", "name": "ernie-4.0-turbo-128k", "owned_by": "ERNIE-4.0-Turbo-128K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-4.0-turbo-8k", "name": "ernie-4.0-turbo-8k", "owned_by": "ERNIE-4.0-Turbo-8K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-4.0-turbo-8k-latest", "name": "ernie-4.0-turbo-8k-latest", "owned_by": "ERNIE-4.0-Turbo-8K-Latest", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-4.0-turbo-8k-preview", "name": "ernie-4.0-turbo-8k-preview", "owned_by": "ERNIE-4.0-Turbo-8K-Preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-character-8k", "name": "ernie-character-8k", "owned_by": "ERNIE-Character-8K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-lite-8k", "name": "ernie-lite-8k", "owned_by": "ERNIE-Lite-8K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-lite-pro-128k", "name": "ernie-lite-pro-128k", "owned_by": "ERNIE-Lite-Pro-128K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-novel-8k", "name": "ernie-novel-8k", "owned_by": "ERNIE-Novel-8K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-speed-128k", "name": "ernie-speed-128k", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "ernie-speed-8k", "name": "ernie-speed-8k", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "ernie-speed-pro-128k", "name": "ernie-speed-pro-128k", "owned_by": "ERNIE-Speed-Pro-128K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ernie-tiny-8k", "name": "ernie-tiny-8k", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "flux", "name": "flux", "owned_by": "flux", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-1.5-flash", "name": "gemini-1.5-flash", "owned_by": "google gemini", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-1.5-flash-exp-0827", "name": "gemini-1.5-flash-exp-0827", "owned_by": "gemini-1.5-flash-exp-0827", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-1.5-flash-latest", "name": "gemini-1.5-flash-latest", "owned_by": "gemini-1.5-flash-latest", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-1.5-pro", "name": "gemini-1.5-pro", "owned_by": "google gemini", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-1.5-pro-001", "name": "gemini-1.5-pro-001", "owned_by": "vertexai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-1.5-pro-002", "name": "gemini-1.5-pro-002", "owned_by": "vertexai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-1.5-pro-exp-0827", "name": "gemini-1.5-pro-exp-0827", "owned_by": "gemini-1.5-pro-exp-0827", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-1.5-pro-latest", "name": "gemini-1.5-pro-latest", "owned_by": "gemini-1.5-pro-latest", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.0-flash-lite-preview", "name": "gemini-2.0-flash-lite-preview", "owned_by": "gemini-2.0-flash-lite-preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.0-pro-exp", "name": "gemini-2.0-pro-exp", "owned_by": "gemini-2.0-pro-exp", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-computer-use-preview-10-2025", "name": "gemini-2.5-computer-use-preview-10-2025", "owned_by": "gemini-2.5-computer-use-preview-10-2025", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-ci", "name": "gemini-2.5-flash-ci", "owned_by": "gemini-2.5-flash-ci", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-lite-nothinking", "name": "gemini-2.5-flash-lite-nothinking", "owned_by": "gemini-2.5-flash-lite-nothinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-lite-preview-06-17", "name": "gemini-2.5-flash-lite-preview-06-17", "owned_by": "gemini-2.5-flash-lite-preview-06-17", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-lite-preview-06-17-thinking", "name": "gemini-2.5-flash-lite-preview-06-17-thinking", "owned_by": "gemini-2.5-flash-lite-preview-06-17-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-lite-thinking", "name": "gemini-2.5-flash-lite-thinking", "owned_by": "gemini-2.5-flash-lite-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-nothinking", "name": "gemini-2.5-flash-nothinking", "owned_by": "gemini-2.5-flash-nothinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-preview-04-17", "name": "gemini-2.5-flash-preview-04-17", "owned_by": "gemini-2.5-flash-preview-04-17", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-preview-04-17-nothinking", "name": "gemini-2.5-flash-preview-04-17-nothinking", "owned_by": "gemini-2.5-flash-preview-04-17-nothinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-preview-04-17-thinking", "name": "gemini-2.5-flash-preview-04-17-thinking", "owned_by": "gemini-2.5-flash-preview-04-17-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-preview-05-20", "name": "gemini-2.5-flash-preview-05-20", "owned_by": "gemini-2.5-flash-preview-05-20", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-preview-05-20-nothinking", "name": "gemini-2.5-flash-preview-05-20-nothinking", "owned_by": "gemini-2.5-flash-preview-05-20-nothinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-preview-05-20-thinking", "name": "gemini-2.5-flash-preview-05-20-thinking", "owned_by": "gemini-2.5-flash-preview-05-20-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-preview-09-2025-nothinking", "name": "gemini-2.5-flash-preview-09-2025-nothinking", "owned_by": "gemini-2.5-flash-preview-09-2025-nothinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-preview-09-2025-thinking", "name": "gemini-2.5-flash-preview-09-2025-thinking", "owned_by": "gemini-2.5-flash-preview-09-2025-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-preview-native-audio-dialog", "name": "gemini-2.5-flash-preview-native-audio-dialog", "owned_by": "gemini-2.5-flash-preview-native-audio-dialog", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-preview-tts", "name": "gemini-2.5-flash-preview-tts", "owned_by": "gemini-2.5-flash-preview-tts", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-flash-thinking", "name": "gemini-2.5-flash-thinking", "owned_by": "gemini-2.5-flash-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-ci", "name": "gemini-2.5-pro-ci", "owned_by": "gemini-2.5-pro-ci", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-deepsearch", "name": "gemini-2.5-pro-deepsearch", "owned_by": "gemini-2.5-pro-deepsearch", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-nothinking", "name": "gemini-2.5-pro-nothinking", "owned_by": "gemini-2.5-pro-nothinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-preview-03-25-thinking", "name": "gemini-2.5-pro-preview-03-25-thinking", "owned_by": "gemini-2.5-pro-preview-03-25-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-preview-05-06-thinking", "name": "gemini-2.5-pro-preview-05-06-thinking", "owned_by": "gemini-2.5-pro-preview-05-06-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-preview-06-05-nothinking", "name": "gemini-2.5-pro-preview-06-05-nothinking", "owned_by": "gemini-2.5-pro-preview-06-05-nothinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-preview-06-05-thinking", "name": "gemini-2.5-pro-preview-06-05-thinking", "owned_by": "gemini-2.5-pro-preview-06-05-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-preview-06-05-thinking-512", "name": "gemini-2.5-pro-preview-06-05-thinking-512", "owned_by": "gemini-2.5-pro-preview-06-05-thinking-512", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-preview-tts", "name": "gemini-2.5-pro-preview-tts", "owned_by": "gemini-2.5-pro-preview-tts", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-thinking", "name": "gemini-2.5-pro-thinking", "owned_by": "gemini-2.5-pro-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-thinking-128", "name": "gemini-2.5-pro-thinking-128", "owned_by": "gemini-2.5-pro-thinking-128", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-2.5-pro-thinking-512", "name": "gemini-2.5-pro-thinking-512", "owned_by": "gemini-2.5-pro-thinking-512", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-3-pro-preview-thinking", "name": "gemini-3-pro-preview-thinking", "owned_by": "gemini-3-pro-preview-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-embedding-exp", "name": "gemini-embedding-exp", "owned_by": "gemini-embedding-exp", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-flash-latest", "name": "gemini-flash-latest", "owned_by": "gemini-flash-latest", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-flash-latest-nothinking", "name": "gemini-flash-latest-nothinking", "owned_by": "gemini-flash-latest-nothinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-flash-latest-thinking", "name": "gemini-flash-latest-thinking", "owned_by": "gemini-flash-latest-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-flash-lite-latest", "name": "gemini-flash-lite-latest", "owned_by": "gemini-flash-lite-latest", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-flash-lite-latest-nothinking", "name": "gemini-flash-lite-latest-nothinking", "owned_by": "gemini-flash-lite-latest-nothinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-flash-lite-latest-thinking", "name": "gemini-flash-lite-latest-thinking", "owned_by": "gemini-flash-lite-latest-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-pro-latest", "name": "gemini-pro-latest", "owned_by": "gemini-pro-latest", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-pro-latest-nothinking", "name": "gemini-pro-latest-nothinking", "owned_by": "gemini-pro-latest-nothinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gemini-pro-latest-thinking", "name": "gemini-pro-latest-thinking", "owned_by": "gemini-pro-latest-thinking", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "glm-4-air", "name": "glm-4-air", "owned_by": "zhipu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "glm-4-airx", "name": "glm-4-airx", "owned_by": "zhipu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "glm-4-flashx", "name": "glm-4-flashx", "owned_by": "zhipu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "glm-4-long", "name": "glm-4-long", "owned_by": "zhipu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "glm-4v-flash", "name": "glm-4v-flash", "owned_by": "zhipu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-3.5o", "name": "gpt-3.5o", "owned_by": "gpt-3.5o", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-all", "name": "gpt-4-all", "owned_by": "gpt-4-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-claude3-haiku-20240307", "name": "gpt-4-claude3-haiku-20240307", "owned_by": "gpt-4-claude3-haiku-20240307", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-claude3-opus-20240229", "name": "gpt-4-claude3-opus-20240229", "owned_by": "gpt-4-claude3-opus-20240229", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-claude3-sonnet-20240229", "name": "gpt-4-claude3-sonnet-20240229", "owned_by": "gpt-4-claude3-sonnet-20240229", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-claude3.5-haiku-20241022", "name": "gpt-4-claude3.5-haiku-20241022", "owned_by": "gpt-4-claude3.5-haiku-20241022", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-claude3.5-sonnet-20240620", "name": "gpt-4-claude3.5-sonnet-20240620", "owned_by": "gpt-4-claude3.5-sonnet-20240620", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-claude3.5-sonnet-20241022", "name": "gpt-4-claude3.5-sonnet-20241022", "owned_by": "gpt-4-claude3.5-sonnet-20241022", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-claude3.5-sonnet-all", "name": "gpt-4-claude3.5-sonnet-all", "owned_by": "gpt-4-claude3.5-sonnet-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-claude3.7-sonnet-20250219", "name": "gpt-4-claude3.7-sonnet-20250219", "owned_by": "gpt-4-claude3.7-sonnet-20250219", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-gizmo-*", "name": "gpt-4-gizmo-*", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4-grok-3-all", "name": "gpt-4-grok-3-all", "owned_by": "gpt-4-grok-3-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4.1-2025-04-14", "name": "gpt-4.1-2025-04-14", "owned_by": "gpt-4.1-2025-04-14", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4.1-mini-2025-04-14", "name": "gpt-4.1-mini-2025-04-14", "owned_by": "gpt-4.1-mini-2025-04-14", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4.1-nano-2025-04-14", "name": "gpt-4.1-nano-2025-04-14", "owned_by": "gpt-4.1-nano-2025-04-14", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4.5-preview", "name": "gpt-4.5-preview", "owned_by": "gpt-4.5-preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-all", "name": "gpt-4o-all", "owned_by": "gpt-4o-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-audio-preview-2024-12-17", "name": "gpt-4o-audio-preview-2024-12-17", "owned_by": "gpt-4o-audio-preview-2024-12-17", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-lite", "name": "gpt-4o-lite", "owned_by": "gpt-4o-lite", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-mini-audio-preview-2024-12-17", "name": "gpt-4o-mini-audio-preview-2024-12-17", "owned_by": "gpt-4o-mini-audio-preview-2024-12-17", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-mini-realtime-preview", "name": "gpt-4o-mini-realtime-preview", "owned_by": "gpt-4o-mini-realtime-preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-mini-realtime-preview-2024-12-17", "name": "gpt-4o-mini-realtime-preview-2024-12-17", "owned_by": "gpt-4o-mini-realtime-preview-2024-12-17", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-mini-transcribe", "name": "gpt-4o-mini-transcribe", "owned_by": "gpt-4o-mini-transcribe", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-mini-transcribe-2025-03-20", "name": "gpt-4o-mini-transcribe-2025-03-20", "owned_by": "gpt-4o-mini-transcribe-2025-03-20", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-mini-tts-1", "name": "gpt-4o-mini-tts-1", "owned_by": "gpt-4o-mini-tts-1", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-mini-tts-2025-03-20", "name": "gpt-4o-mini-tts-2025-03-20", "owned_by": "gpt-4o-mini-tts-2025-03-20", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-realtime-preview", "name": "gpt-4o-realtime-preview", "owned_by": "gpt-4o-realtime-preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-realtime-preview-2024-10-01", "name": "gpt-4o-realtime-preview-2024-10-01", "owned_by": "gpt-4o-realtime-preview-2024-10-01", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-realtime-preview-2025-06-03", "name": "gpt-4o-realtime-preview-2025-06-03", "owned_by": "gpt-4o-realtime-preview-2025-06-03", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-transcribe", "name": "gpt-4o-transcribe", "owned_by": "gpt-4o-transcribe", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-4o-transcribe-2025-03-20", "name": "gpt-4o-transcribe-2025-03-20", "owned_by": "gpt-4o-transcribe-2025-03-20", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-5-2025-08-07", "name": "gpt-5-2025-08-07", "owned_by": "gpt-5-2025-08-07", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-5-all", "name": "gpt-5-all", "owned_by": "gpt-5-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-5-chat-2025-08-07", "name": "gpt-5-chat-2025-08-07", "owned_by": "gpt-5-chat-2025-08-07", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-5-mini-2025-08-07", "name": "gpt-5-mini-2025-08-07", "owned_by": "gpt-5-mini-2025-08-07", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-5-nano-2025-08-07", "name": "gpt-5-nano-2025-08-07", "owned_by": "gpt-5-nano-2025-08-07", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-5-thinking-all", "name": "gpt-5-thinking-all", "owned_by": "gpt-5-thinking-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-5.1-2025-11-13", "name": "gpt-5.1-2025-11-13", "owned_by": "gpt-5.1-2025-11-13", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "gpt-oss-120b-1", "name": "gpt-oss-120b-1", "owned_by": "gpt-oss-120b-1", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "grok-3-all", "name": "grok-3-all", "owned_by": "grok-3-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "grok-3-deepersearch", "name": "grok-3-deepersearch", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-deepsearch", "name": "grok-3-deepsearch", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-deepsearch-all", "name": "grok-3-deepsearch-all", "owned_by": "grok-3-deepsearch-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "grok-3-image", "name": "grok-3-image", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-reasoner", "name": "grok-3-reasoner", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-reasoner-all", "name": "grok-3-reasoner-all", "owned_by": "grok-3-reasoner-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "grok-3-reasoning", "name": "grok-3-reasoning", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-search", "name": "grok-3-search", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-4-0709", "name": "grok-4-0709", "owned_by": "grok-4-0709", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "grok-beta", "name": "grok-beta", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gte-rerank", "name": "gte-rerank", "owned_by": "gte-rerank", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "hailuo", "name": "hailuo", "owned_by": "hailuo", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "hispeed/deepseek-r1", "name": "hispeed/deepseek-r1", "owned_by": "HiSpeed/DeepSeek-R1", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "hunyuan-code", "name": "hunyuan-code", "owned_by": "hunyuan-code", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "hunyuan-embedding", "name": "hunyuan-embedding", "owned_by": "tencent", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "hunyuan-functioncall", "name": "hunyuan-functioncall", "owned_by": "hunyuan-functioncall", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "hunyuan-large", "name": "hunyuan-large", "owned_by": "hunyuan-large", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "hunyuan-large-longcontext", "name": "hunyuan-large-longcontext", "owned_by": "hunyuan-large-longcontext", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "hunyuan-lite", "name": "hunyuan-lite", "owned_by": "tencent", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "hunyuan-pro", "name": "hunyuan-pro", "owned_by": "tencent", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "hunyuan-role", "name": "hunyuan-role", "owned_by": "hunyuan-role", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "hunyuan-standard", "name": "hunyuan-standard", "owned_by": "tencent", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "hunyuan-turbo", "name": "hunyuan-turbo", "owned_by": "hunyuan-turbo", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "hunyuan-turbo-vision", "name": "hunyuan-turbo-vision", "owned_by": "hunyuan-turbo-vision", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "hunyuan-vision", "name": "hunyuan-vision", "owned_by": "tencent", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "jimeng-3.0", "name": "jimeng-3.0", "owned_by": "jimeng-3.0", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "kimi-k2-0711-preview", "name": "kimi-k2-0711-preview", "owned_by": "kimi-k2-0711-preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "kimi-latest", "name": "kimi-latest", "owned_by": "kimi-latest", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "luma-video", "name": "luma-video", "owned_by": "luma-video", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "meta-llama/llama-4", "name": "meta-llama/llama-4", "owned_by": "meta-llama/llama-4", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "meta-llama/meta-llama-3.1-405b-instruct", "name": "meta-llama/meta-llama-3.1-405b-instruct", "owned_by": "meta-llama/Meta-Llama-3.1-405B-Instruct", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "midjourney", "name": "midjourney", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "mj-chat", "name": "mj-chat", "owned_by": "mj-chat", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "model-router", "name": "model-router", "owned_by": "model-router", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "net-glm-3-turbo", "name": "net-glm-3-turbo", "owned_by": "net-glm-3-turbo", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "net-gpt-3.5-turbo", "name": "net-gpt-3.5-turbo", "owned_by": "net-gpt-3.5-turbo", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "net-gpt-3.5-turbo-16k", "name": "net-gpt-3.5-turbo-16k", "owned_by": "net-gpt-3.5-turbo-16k", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "net-gpt-4", "name": "net-gpt-4", "owned_by": "net-gpt-4", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "net-gpt-4-0125-preview", "name": "net-gpt-4-0125-preview", "owned_by": "net-gpt-4-0125-preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "net-gpt-4-1106-preview", "name": "net-gpt-4-1106-preview", "owned_by": "net-gpt-4-1106-preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "net-gpt-4-turbo-preview", "name": "net-gpt-4-turbo-preview", "owned_by": "net-gpt-4-turbo-preview", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "o1-all", "name": "o1-all", "owned_by": "o1-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "o1-pro-all", "name": "o1-pro-all", "owned_by": "o1-pro-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "o3-2025-04-16", "name": "o3-2025-04-16", "owned_by": "o3-2025-04-16", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "o3-mini-2025-01-31", "name": "o3-mini-2025-01-31", "owned_by": "o3-mini-2025-01-31", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "o3-mini-all", "name": "o3-mini-all", "owned_by": "o3-mini-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "o3-mini-high-all", "name": "o3-mini-high-all", "owned_by": "o3-mini-high-all", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "o3-pro-2025-06-10", "name": "o3-pro-2025-06-10", "owned_by": "o3-pro-2025-06-10", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "o4-mini-2025-04-16", "name": "o4-mini-2025-04-16", "owned_by": "o4-mini-2025-04-16", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ocoolai-helper", "name": "ocoolai-helper", "owned_by": "ocoolai-helper", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "ocoolai/deepseek-r1", "name": "ocoolai/deepseek-r1", "owned_by": "ocoolAI/DeepSeek-R1", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "omni-moderation-2024-09-26", "name": "omni-moderation-2024-09-26", "owned_by": "omni-moderation-2024-09-26", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "pika-text-to-video", "name": "pika-text-to-video", "owned_by": "pika-text-to-video", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "qwen-14b-chat", "name": "qwen-14b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-72b-chat", "name": "qwen-72b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-7b-chat", "name": "qwen-7b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-coder-plus", "name": "qwen-coder-plus", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-coder-plus-latest", "name": "qwen-coder-plus-latest", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-coder-turbo", "name": "qwen-coder-turbo", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-coder-turbo-latest", "name": "qwen-coder-turbo-latest", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-math-plus", "name": "qwen-math-plus", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-math-plus-latest", "name": "qwen-math-plus-latest", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-math-turbo", "name": "qwen-math-turbo", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-math-turbo-latest", "name": "qwen-math-turbo-latest", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-max-latest", "name": "qwen-max-latest", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-max-latest", "name": "qwen-vl-max-latest", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-ocr", "name": "qwen-vl-ocr", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-ocr-latest", "name": "qwen-vl-ocr-latest", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-plus-latest", "name": "qwen-vl-plus-latest", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen/qwen2.5-vl-72b-instruct-search", "name": "qwen/qwen2.5-vl-72b-instruct-search", "owned_by": "Qwen/Qwen2.5-VL-72B-Instruct-search", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "qwen/qwen2.5-vl-7b-instruct-search", "name": "qwen/qwen2.5-vl-7b-instruct-search", "owned_by": "Qwen/Qwen2.5-VL-7B-Instruct-search", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "qwen/qwen3-235b-a22b-search", "name": "qwen/qwen3-235b-a22b-search", "owned_by": "Qwen/Qwen3-235B-A22B-search", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "qwen/qwq-32b-search", "name": "qwen/qwq-32b-search", "owned_by": "Qwen/QwQ-32B-search", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "qwen1.5-110b-chat", "name": "qwen1.5-110b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen1.5-14b-chat", "name": "qwen1.5-14b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen1.5-32b-chat", "name": "qwen1.5-32b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen1.5-72b-chat", "name": "qwen1.5-72b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen1.5-7b-chat", "name": "qwen1.5-7b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2-57b-a14b-instruct", "name": "qwen2-57b-a14b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2-72b-instruct", "name": "qwen2-72b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2-7b-instruct", "name": "qwen2-7b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2-math-72b-instruct", "name": "qwen2-math-72b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2-math-7b-instruct", "name": "qwen2-math-7b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2.5-coder-14b-instruct", "name": "qwen2.5-coder-14b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2.5-coder-32b-instruct", "name": "qwen2.5-coder-32b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "research", "name": "research", "owned_by": "research", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "research-scholar", "name": "research-scholar", "owned_by": "research-scholar", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "sambert-v1", "name": "sambert-v1", "owned_by": "sambert-v1", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "search-gpts-chat", "name": "search-gpts-chat", "owned_by": "search-gpts-chat", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "sparkdesk-4.0-ultra", "name": "sparkdesk-4.0-ultra", "owned_by": "SparkDesk-4.0-Ultra", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "sparkdesk-lite", "name": "sparkdesk-lite", "owned_by": "SparkDesk-Lite", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "sparkdesk-max", "name": "sparkdesk-max", "owned_by": "SparkDesk-Max", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "sparkdesk-max-32k", "name": "sparkdesk-max-32k", "owned_by": "SparkDesk-Max-32k", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "sparkdesk-pro", "name": "sparkdesk-pro", "owned_by": "SparkDesk-Pro", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "sparkdesk-pro-128k", "name": "sparkdesk-pro-128k", "owned_by": "SparkDesk-Pro-128K", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "stable-diffusion", "name": "stable-diffusion", "owned_by": "stable-diffusion", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "step-1-128k", "name": "step-1-128k", "owned_by": "stepfun", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "step-1-256k", "name": "step-1-256k", "owned_by": "stepfun", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "step-1-32k", "name": "step-1-32k", "owned_by": "stepfun", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "step-1-8k", "name": "step-1-8k", "owned_by": "stepfun", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "step-1-flash", "name": "step-1-flash", "owned_by": "stepfun", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "step-1.5v-mini", "name": "step-1.5v-mini", "owned_by": "step-1.5v-mini", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "step-1v-32k", "name": "step-1v-32k", "owned_by": "stepfun", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "step-1v-8k", "name": "step-1v-8k", "owned_by": "stepfun", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "suno", "name": "suno", "owned_by": "suno", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "suno-v3", "name": "suno-v3", "owned_by": "suno-v3", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "text-embedding-v2", "name": "text-embedding-v2", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "text-embedding-v3", "name": "text-embedding-v3", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "tts-hd-1", "name": "tts-hd-1", "owned_by": "tts-hd-1", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "url-analysis", "name": "url-analysis", "owned_by": "url-analysis", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "yi-vision-v2", "name": "yi-vision-v2", "owned_by": "yi-vision-v2", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1677649963 } }, { "id": "doubao-seed-1-6-flash-250828", "name": "doubao-seed-1-6-flash-250828", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "minimax_minimax-hailuo-02", "name": "minimax_minimax-hailuo-02", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-1.5-vision-pro-250328", "name": "doubao-1.5-vision-pro-250328", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-haiku-4-5-20251001", "name": "claude-haiku-4-5-20251001", "owned_by": "vertex-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-cls45-0929", "name": "dmxapi-cls45-0929", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_prompt_analyzer", "name": "mj_fast_prompt_analyzer", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_low_variation", "name": "mj_turbo_low_variation", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_background_eraser", "name": "mj_fast_background_eraser", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_low_variation", "name": "mj_relax_low_variation", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-plus-2024-11-25", "name": "qwen-plus-2024-11-25", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "sparkdesk-v4.0", "name": "sparkdesk-v4.0", "owned_by": "xunfei", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seed-code-preview-251028", "name": "doubao-seed-code-preview-251028", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_describe", "name": "mj_fast_describe", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_upscale_subtle", "name": "mj_relax_upscale_subtle", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-r1-250120", "name": "deepseek-r1-250120", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-cls4-0514", "name": "dmxapi-cls4-0514", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-image-1-dmx02", "name": "gpt-image-1-dmx02", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_multi_elements_add", "name": "kling_multi_elements_add", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_effects", "name": "kling_effects", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_image", "name": "kling_image", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_imagine", "name": "mj_relax_imagine", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_multi_elements_init", "name": "kling_multi_elements_init", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "minimax_s2v-01", "name": "minimax_s2v-01", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "speech-02-hd", "name": "speech-02-hd", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "speech-2.6-hd", "name": "speech-2.6-hd", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_prompt_analyzer", "name": "mj_relax_prompt_analyzer", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen1.5-0.5b-chat", "name": "qwen1.5-0.5b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-turbo-2025-02-11", "name": "qwen-turbo-2025-02-11", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.2-thinking", "name": "deepseek-v3.2-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kimi-k2-0905-preview", "name": "kimi-k2-0905-preview", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kimi-k2-thinking-turbo", "name": "kimi-k2-thinking-turbo", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_pic_reader", "name": "mj_relax_pic_reader", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-flash-image-base64", "name": "gemini-2.5-flash-image-base64", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-codex-low", "name": "gpt-5-codex-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-1.8b-longcontext-chat", "name": "qwen-1.8b-longcontext-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-math-turbo-2024-09-19", "name": "qwen-math-turbo-2024-09-19", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-opus-4-20250514-ssvip", "name": "claude-opus-4-20250514-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_pan", "name": "mj_relax_pan", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-clo41-20250805", "name": "dmxapi-clo41-20250805", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "minimax-m1-80k", "name": "minimax-m1-80k", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_shorten", "name": "mj_turbo_shorten", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-max-search", "name": "qwen-max-search", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-gm3-pro", "name": "dmxapi-gm3-pro", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-huoshan-deepseek-r1-671b-64k", "name": "dmxapi-huoshan-deepseek-r1-671b-64k", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-coder-plus-2024-11-06", "name": "qwen-coder-plus-2024-11-06", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2-1.5b-instruct", "name": "qwen2-1.5b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-nano-ssvip", "name": "gpt-5-nano-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_audio_video_to_audio", "name": "kling_audio_video_to_audio", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "minimax_t2v-01-director", "name": "minimax_t2v-01-director", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_low_variation", "name": "mj_fast_low_variation", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_upscale_2x", "name": "mj_turbo_upscale_2x", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "tts-pro", "name": "tts-pro", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-web", "name": "gpt-5-web", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-audio-chat", "name": "qwen-audio-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_modal", "name": "mj_fast_modal", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qvq-72b-preview", "name": "qvq-72b-preview", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_upscale", "name": "mj_fast_upscale", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_custom_zoom", "name": "mj_relax_custom_zoom", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_upscale_2x", "name": "mj_relax_upscale_2x", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "minimax-hailuo-2.3-fast", "name": "minimax-hailuo-2.3-fast", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_reroll", "name": "mj_fast_reroll", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-plus-2024-08-06", "name": "qwen-plus-2024-08-06", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_inpaint", "name": "mj_fast_inpaint", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_high_variation", "name": "mj_turbo_high_variation", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2.5-coder-0.5b-instruct", "name": "qwen2.5-coder-0.5b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-opus-4-5-20251101", "name": "claude-opus-4-5-20251101", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-huoshan-deepseek-v3", "name": "dmxapi-huoshan-deepseek-v3", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-flash-deepsearch", "name": "gemini-2.5-flash-deepsearch", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_describe", "name": "mj_relax_describe", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-coder-turbo-2024-09-19", "name": "qwen-coder-turbo-2024-09-19", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-max-2024-08-09", "name": "qwen-vl-max-2024-08-09", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2-vl-2b-instruct", "name": "qwen2-vl-2b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-opus-4-20250514-thinking-ssvip", "name": "claude-opus-4-20250514-thinking-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-gem25-pro", "name": "dmxapi-gem25-pro", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_audio_text_to_audio", "name": "kling_audio_text_to_audio", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_upscale_creative", "name": "mj_relax_upscale_creative", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_multi_elements_delete", "name": "kling_multi_elements_delete", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-audio-turbo-2024-12-04", "name": "qwen-audio-turbo-2024-12-04", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.1-uc", "name": "deepseek-v3.1-uc", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_pan", "name": "mj_turbo_pan", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-deepseek-r1-32b", "name": "dmxapi-deepseek-r1-32b", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seedance-1-0-pro-250528", "name": "doubao-seedance-1-0-pro-250528", "owned_by": "doubao-video", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.0-flash-ssvip", "name": "gemini-2.0-flash-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "glm-4.1v-thinking-flash", "name": "glm-4.1v-thinking-flash", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_variation", "name": "mj_turbo_variation", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen1.5-1.8b-chat", "name": "qwen1.5-1.8b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "sora-2-hd-10s-chat", "name": "sora-2-hd-10s-chat", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "hunyuan-mt-7b", "name": "hunyuan-mt-7b", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-opus-4-5-20251101-cc", "name": "claude-opus-4-5-20251101-cc", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_variation", "name": "mj_fast_variation", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-plus-2024-08-09", "name": "qwen-vl-plus-2024-08-09", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_virtual_try_on", "name": "kling_virtual_try_on", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2-audio-instruct", "name": "qwen2-audio-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-sonnet-4-5-20250929-cc", "name": "claude-sonnet-4-5-20250929-cc", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-mini-ssvip", "name": "gpt-5-mini-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "viduq1", "name": "viduq1", "owned_by": "vidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepclaude-liu", "name": "deepclaude-liu", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_extend", "name": "kling_extend", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "minimax_files_retrieve", "name": "minimax_files_retrieve", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_inpaint", "name": "mj_relax_inpaint", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-audio-turbo-2024-08-07", "name": "qwen-audio-turbo-2024-08-07", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-code-1", "name": "claude-code-1", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-v1", "name": "qwen-vl-v1", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_upscale_2x", "name": "mj_fast_upscale_2x", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-pro-ssvip", "name": "gpt-5-pro-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_video", "name": "kling_video", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_inpaint", "name": "mj_turbo_inpaint", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen3-coder-480b", "name": "qwen3-coder-480b", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.1-sn", "name": "deepseek-v3.1-sn", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "minimax-hailuo-02", "name": "minimax-hailuo-02", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_upload", "name": "mj_fast_upload", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_modal", "name": "mj_relax_modal", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_reroll", "name": "mj_relax_reroll", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qvq-max-2025-03-25", "name": "qvq-max-2025-03-25", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "ernie-lite-8k-0922", "name": "ernie-lite-8k-0922", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-image-1-dmx03", "name": "gpt-image-1-dmx03", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "speech-2.5-turbo-preview", "name": "speech-2.5-turbo-preview", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-opus-4-5-20251101-thinking", "name": "claude-opus-4-5-20251101-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_blend", "name": "mj_fast_blend", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "ernie-lite-8k-0308", "name": "ernie-lite-8k-0308", "owned_by": "baidu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_prompt_analyzer_extended", "name": "mj_turbo_prompt_analyzer_extended", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "yi-medium-200k", "name": "yi-medium-200k", "owned_by": "lingyiwanwu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-clo4-0514", "name": "dmxapi-clo4-0514", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "speech-2.6-hd-dmx", "name": "speech-2.6-hd-dmx", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.1-thinking", "name": "deepseek-v3.1-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seedream-4-0-250828", "name": "doubao-seedream-4-0-250828", "owned_by": "volcengine", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_high_variation", "name": "mj_fast_high_variation", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_upload", "name": "mj_relax_upload", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepgeminipro-liu", "name": "deepgeminipro-liu", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-plus-2024-06-24", "name": "qwen-plus-2024-06-24", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "hunyuan-t1-20250711", "name": "hunyuan-t1-20250711", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_zoom", "name": "mj_fast_zoom", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-ocr-2024-10-28", "name": "qwen-vl-ocr-2024-10-28", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "speech-2.5-hd-preview", "name": "speech-2.5-hd-preview", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.2-exp-thinking", "name": "deepseek-v3.2-exp-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-clo45-20251101", "name": "dmxapi-clo45-20251101", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepgeminiflash-liu", "name": "deepgeminiflash-liu", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-deepseek-r1", "name": "dmxapi-deepseek-r1", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "yi-vision", "name": "yi-vision", "owned_by": "lingyiwanwu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-r1-250528", "name": "deepseek-r1-250528", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "hunyuan-a13b", "name": "hunyuan-a13b", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "hunyuan-standard-256k", "name": "hunyuan-standard-256k", "owned_by": "tencent", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-embedding-large-text-250515", "name": "doubao-embedding-large-text-250515", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_zoom", "name": "mj_turbo_zoom", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_multi_elements_clear", "name": "kling_multi_elements_clear", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_multi_elements_preview", "name": "kling_multi_elements_preview", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_custom_zoom", "name": "mj_turbo_custom_zoom", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-math-plus-2024-09-19", "name": "qwen-math-plus-2024-09-19", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "glm-4-alltools", "name": "glm-4-alltools", "owned_by": "zhipu_4v", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_upscale_creative", "name": "mj_turbo_upscale_creative", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-flash-ssvip", "name": "gemini-2.5-flash-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-haiku-4-5-20251001-cc", "name": "claude-haiku-4-5-20251001-cc", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_shorten", "name": "mj_fast_shorten", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_edits", "name": "mj_relax_edits", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwq-plus", "name": "qwq-plus", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-sonnet-4-5-20250929-ssvip", "name": "claude-sonnet-4-5-20250929-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-clh45think-20251001", "name": "dmxapi-clh45think-20251001", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_edits", "name": "mj_fast_edits", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_background_eraser", "name": "mj_turbo_background_eraser", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "speech-2.6-turbo", "name": "speech-2.6-turbo", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-clh45-20251001", "name": "dmxapi-clh45-20251001", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_video", "name": "mj_relax_video", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-max-2024-11-19", "name": "qwen-vl-max-2024-11-19", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.1-nothinking", "name": "deepseek-v3.1-nothinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seedance-1-0-lite-i2v-250428", "name": "doubao-seedance-1-0-lite-i2v-250428", "owned_by": "doubao-video", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-4.1-thinking", "name": "grok-4.1-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_multi_elements_submit", "name": "kling_multi_elements_submit", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-turbo-2024-09-19", "name": "qwen-turbo-2024-09-19", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "imagen4", "name": "imagen4", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-4-ssvip", "name": "grok-4-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwq-32b-preview", "name": "qwq-32b-preview", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-haiku-4-5-20251001-thinking", "name": "claude-haiku-4-5-20251001-thinking", "owned_by": "vertex-ai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-clo45thinking-20251101", "name": "dmxapi-clo45thinking-20251101", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-ssvip", "name": "gpt-5.1-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_upscale_4x", "name": "mj_fast_upscale_4x", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-plus-2024-07-23", "name": "qwen-plus-2024-07-23", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-code", "name": "claude-code", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_high_variation", "name": "mj_relax_high_variation", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "glm-4-0520", "name": "glm-4-0520", "owned_by": "coze", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "claude-haiku-4-5-ssvip", "name": "claude-haiku-4-5-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-image-1-dmx01", "name": "gpt-image-1-dmx01", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_prompt_analyzer_extended", "name": "mj_relax_prompt_analyzer_extended", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_upscale_4x", "name": "mj_turbo_upscale_4x", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "glm-z1-flash", "name": "glm-z1-flash", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "hunyuan-t1-20250321", "name": "hunyuan-t1-20250321", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-audio-turbo-latest", "name": "qwen-audio-turbo-latest", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seededit-3-0-i2i-250628", "name": "doubao-seededit-3-0-i2i-250628", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_upscale", "name": "mj_relax_upscale", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_upscale", "name": "mj_turbo_upscale", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_edits", "name": "mj_turbo_edits", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-plus-2024-02-06", "name": "qwen-plus-2024-02-06", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_describe", "name": "mj_turbo_describe", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "music-2.0", "name": "music-2.0", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-1.8b-chat", "name": "qwen-1.8b-chat", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seedance-1-0-lite-t2v-250428", "name": "doubao-seedance-1-0-lite-t2v-250428", "owned_by": "doubao-video", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-ssvip", "name": "gpt-5-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_reroll", "name": "mj_turbo_reroll", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "minimax-hailuo-2.3", "name": "minimax-hailuo-2.3", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2.5-coder-3b-instruct", "name": "qwen2.5-coder-3b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-embedding-vision-250615", "name": "doubao-embedding-vision-250615", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-3-pro-preview-ssvip", "name": "gemini-3-pro-preview-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_pic_reader", "name": "mj_fast_pic_reader", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-plus-2024-12-20", "name": "qwen-plus-2024-12-20", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-cl37s0219", "name": "dmxapi-cl37s0219", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-pro-ssvip", "name": "gemini-2.5-pro-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_prompt_analyzer_extended", "name": "mj_fast_prompt_analyzer_extended", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_upscale_subtle", "name": "mj_turbo_upscale_subtle", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "multimodal-embedding-v1", "name": "multimodal-embedding-v1", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "abab7-chat-preview", "name": "abab7-chat-preview", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-deepseek-r1-70b", "name": "dmxapi-deepseek-r1-70b", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-deepseek-v3", "name": "dmxapi-deepseek-v3", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-high", "name": "gpt-5-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_prompt_analyzer", "name": "mj_turbo_prompt_analyzer", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "musesteamer-air-image", "name": "musesteamer-air-image", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_upload", "name": "mj_turbo_upload", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-max-2024-02-01", "name": "qwen-vl-max-2024-02-01", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_imagine", "name": "mj_fast_imagine", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_upscale_4x", "name": "mj_relax_upscale_4x", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-plus-2024-09-19", "name": "qwen-plus-2024-09-19", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen3-omni-flash", "name": "qwen3-omni-flash", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_upscale_subtle", "name": "mj_fast_upscale_subtle", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-plus-2023-12-01", "name": "qwen-vl-plus-2023-12-01", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seedance-1-0-pro-fast-251015", "name": "doubao-seedance-1-0-pro-fast-251015", "owned_by": "doubao-video", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_multi_image2image", "name": "kling_multi_image2image", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-imagine-0.9", "name": "grok-imagine-0.9", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kat-coder-pro-v1-free", "name": "kat-coder-pro-v1-free", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "nano-banana-2", "name": "nano-banana-2", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-audio-turbo", "name": "qwen-audio-turbo", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-max-2024-09-19", "name": "qwen-max-2024-09-19", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2-math-1.5b-instruct", "name": "qwen2-math-1.5b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seed-1-6-thinking-250715", "name": "doubao-seed-1-6-thinking-250715", "owned_by": "volcengine", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-3-pro-preview-11-2025", "name": "gemini-3-pro-preview-11-2025", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-chat-v1", "name": "qwen-vl-chat-v1", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "chat-seedream-3.0", "name": "chat-seedream-3.0", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-gem25-0506", "name": "dmxapi-gem25-0506", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "seedgeminipro-liu", "name": "seedgeminipro-liu", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "speech-02-turbo", "name": "speech-02-turbo", "owned_by": "minimax", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-deepseek-v3-fast", "name": "dmxapi-deepseek-v3-fast", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-4.1-non-thinking", "name": "grok-4.1-non-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_lip_sync", "name": "kling_lip_sync", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_video", "name": "mj_turbo_video", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_blend", "name": "mj_turbo_blend", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_shorten", "name": "mj_relax_shorten", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "sora-2-hd-15s-chat", "name": "sora-2-hd-15s-chat", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_imagine", "name": "mj_turbo_imagine", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-codex-high", "name": "gpt-5-codex-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_blend", "name": "mj_relax_blend", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2.5-0.5b-instruct", "name": "qwen2.5-0.5b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "yi-spark", "name": "yi-spark", "owned_by": "lingyiwanwu", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seedream-4-5-251128", "name": "doubao-seedream-4-5-251128", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-cl35s1022", "name": "dmxapi-cl35s1022", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_zoom", "name": "mj_relax_zoom", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-vl-max-2024-10-30", "name": "qwen-vl-max-2024-10-30", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_modal", "name": "mj_turbo_modal", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2.5-1.5b-instruct", "name": "qwen2.5-1.5b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "kling_image_expand", "name": "kling_image_expand", "owned_by": "kling", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_turbo_pic_reader", "name": "mj_turbo_pic_reader", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen2-0.5b-instruct", "name": "qwen2-0.5b-instruct", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-r1-plus", "name": "deepseek-r1-plus", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-deepseek-r1-search", "name": "dmxapi-deepseek-r1-search", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-codex-medium", "name": "gpt-5-codex-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-math-plus-2024-08-16", "name": "qwen-math-plus-2024-08-16", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-turbo-2024-06-24", "name": "qwen-turbo-2024-06-24", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-deepseek-r1-long", "name": "dmxapi-deepseek-r1-long", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seedream-3-0-t2i-250415", "name": "doubao-seedream-3-0-t2i-250415", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-image-1-dmx00", "name": "gpt-image-1-dmx00", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_custom_zoom", "name": "mj_fast_custom_zoom", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-turbo-2024-02-06", "name": "qwen-turbo-2024-02-06", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.2-cc", "name": "deepseek-v3.2-cc", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_pan", "name": "mj_fast_pan", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_variation", "name": "mj_relax_variation", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-plus-2024-11-27", "name": "qwen-plus-2024-11-27", "owned_by": "ali", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-seed-1-6-251015", "name": "doubao-seed-1-6-251015", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_relax_background_eraser", "name": "mj_relax_background_eraser", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "step-1o-vision-32k", "name": "step-1o-vision-32k", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_upscale_creative", "name": "mj_fast_upscale_creative", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "mj_fast_video", "name": "mj_fast_video", "owned_by": "midjourney", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "dmxapi-clo41think-20250805", "name": "dmxapi-clo41think-20250805", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "glm-4.6-thinking", "name": "glm-4.6-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "veo2", "name": "veo2", "owned_by": "veo2", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749630209000 } }, { "id": "t2v-01-director", "name": "t2v-01-director", "owned_by": "T2V-01-Director", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749633859000 } }, { "id": "doubao-seedance-1.0-pro", "name": "doubao-seedance-1.0-pro", "owned_by": "Doubao-Seedance-1.0-pro", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750407694000 } }, { "id": "deepseek-v3-250324", "name": "deepseek-v3-250324", "owned_by": "deepseek-v3-250324", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751367328000 } }, { "id": "doubao-tts01", "name": "doubao-tts01", "owned_by": "Doubao-tts01", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751812612000 } }, { "id": "gpt-o1", "name": "gpt-o1", "owned_by": "gpt-o1", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1751952965000 } }, { "id": "minimax-s2v-01", "name": "minimax-s2v-01", "owned_by": "minimax-S2V-01", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1752717517000 } }, { "id": "aionly-kld3.5-sonnet-v2", "name": "aionly-kld3.5-sonnet-v2", "owned_by": "aionly-kld3.5-sonnet-v2", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749608781000 } }, { "id": "moonshot-kimi-k2-instruct", "name": "moonshot-kimi-k2-instruct", "owned_by": "Moonshot-Kimi-K2-Instruct", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753262804000 } }, { "id": "claude-sonnet4", "name": "claude-sonnet4", "owned_by": "claude-sonnet4", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749608781000 } }, { "id": "veo-3.0-generate-001", "name": "veo-3.0-generate-001", "owned_by": "veo-3.0-generate-001", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749630209000 } }, { "id": "qianw3-30b", "name": "qianw3-30b", "owned_by": "qianw3-30b", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754037784000 } }, { "id": "ah-claude-sonnet4", "name": "ah-claude-sonnet4", "owned_by": "ah-claude-sonnet4", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754068787000 } }, { "id": "deepseek-v3-1-250821", "name": "deepseek-v3-1-250821", "owned_by": "deepseek-v3-1-250821", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755859341000 } }, { "id": "seedream4.0", "name": "seedream4.0", "owned_by": "Seedream4.0", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757390935000 } }, { "id": "doubao-1-5-thinking-pro-m-250428", "name": "doubao-1-5-thinking-pro-m-250428", "owned_by": "doubao-1-5-thinking-pro-m-250428", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758159440000 } }, { "id": "gemini-2.5-flash-image-text", "name": "gemini-2.5-flash-image-text", "owned_by": "gemini-2.5-flash-image-text", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760521818000 } }, { "id": "veo-3.0-fast-generate-001", "name": "veo-3.0-fast-generate-001", "owned_by": "veo-3.0-fast-generate-001", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760609641000 } }, { "id": "doubao-seed-1-6-lite-251015", "name": "doubao-seed-1-6-lite-251015", "owned_by": "doubao-seed-1-6-lite-251015", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760668631000 } }, { "id": "gpt-5-codex-azure", "name": "gpt-5-codex-azure", "owned_by": "gpt-5-codex-azure", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760331982000 } }, { "id": "gpt-5-pro-openai", "name": "gpt-5-pro-openai", "owned_by": "gpt-5-pro-openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761624065000 } }, { "id": "claude-sonnet-4-5-20250929-02", "name": "claude-sonnet-4-5-20250929-02", "owned_by": "claude-sonnet-4-5-20250929-02", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759193970000 } }, { "id": "claude-haiku-4-5-20251001-02", "name": "claude-haiku-4-5-20251001-02", "owned_by": "claude-haiku-4-5-20251001-02", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759193970000 } }, { "id": "gemini-3-pro-image-preview-e", "name": "gemini-3-pro-image-preview-e", "owned_by": "gemini-3-pro-image-preview-e", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763959055000 } }, { "id": "gpt-image-1-2025-04", "name": "gpt-image-1-2025-04", "owned_by": "gpt-image-1-2025-04", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755581853000 } }, { "id": "deepseek-r1-search", "name": "deepseek-r1-search", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3-20250324", "name": "deepseek-v3-20250324", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3-search", "name": "deepseek-v3-search", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-flash-thinking-*", "name": "gemini-2.5-flash-thinking-*", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-flash-thinking-24576", "name": "gemini-2.5-flash-thinking-24576", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-flash-thinking-512", "name": "gemini-2.5-flash-thinking-512", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-pro-flatfee", "name": "gemini-2.5-pro-flatfee", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-3-pro-image-preview-flatfee", "name": "gemini-3-pro-image-preview-flatfee", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-4o-audio-preview-2024-10-01", "name": "gpt-4o-audio-preview-2024-10-01", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-2025-08-07-high", "name": "gpt-5-2025-08-07-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-2025-08-07-low", "name": "gpt-5-2025-08-07-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-2025-08-07-medium", "name": "gpt-5-2025-08-07-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-2025-08-07-minimal", "name": "gpt-5-2025-08-07-minimal", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-low", "name": "gpt-5-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-medium", "name": "gpt-5-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-mini-2025-08-07-high", "name": "gpt-5-mini-2025-08-07-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-mini-2025-08-07-low", "name": "gpt-5-mini-2025-08-07-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-mini-2025-08-07-medium", "name": "gpt-5-mini-2025-08-07-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-mini-2025-08-07-minimal", "name": "gpt-5-mini-2025-08-07-minimal", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-mini-high", "name": "gpt-5-mini-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-mini-low", "name": "gpt-5-mini-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-mini-medium", "name": "gpt-5-mini-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-mini-minimal", "name": "gpt-5-mini-minimal", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-minimal", "name": "gpt-5-minimal", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-nano-2025-08-07-high", "name": "gpt-5-nano-2025-08-07-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-nano-2025-08-07-low", "name": "gpt-5-nano-2025-08-07-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-nano-2025-08-07-medium", "name": "gpt-5-nano-2025-08-07-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-nano-2025-08-07-minimal", "name": "gpt-5-nano-2025-08-07-minimal", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-nano-high", "name": "gpt-5-nano-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-nano-low", "name": "gpt-5-nano-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-nano-medium", "name": "gpt-5-nano-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-nano-minimal", "name": "gpt-5-nano-minimal", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-audio", "name": "gpt-audio", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-audio-2025-08-28", "name": "gpt-audio-2025-08-28", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-realtime", "name": "gpt-realtime", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-realtime-2025-08-28", "name": "gpt-realtime-2025-08-28", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o1-2024-12-17-high", "name": "o1-2024-12-17-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o1-2024-12-17-low", "name": "o1-2024-12-17-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o1-2024-12-17-medium", "name": "o1-2024-12-17-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o1-high", "name": "o1-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o1-low", "name": "o1-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o1-medium", "name": "o1-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-mini-2025-01-31-high", "name": "o3-mini-2025-01-31-high", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-mini-2025-01-31-low", "name": "o3-mini-2025-01-31-low", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-mini-2025-01-31-medium", "name": "o3-mini-2025-01-31-medium", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-mini-low", "name": "o3-mini-low", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-mini-medium", "name": "o3-mini-medium", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o4-mini-low", "name": "o4-mini-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o4-mini-medium", "name": "o4-mini-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.1-250821", "name": "deepseek-v3.1-250821", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.1-think-250821", "name": "deepseek-v3.1-think-250821", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-3.5-turbo-instruct-0914", "name": "gpt-3.5-turbo-instruct-0914", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-35-turbo", "name": "gpt-35-turbo", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-35-turbo-instruct", "name": "gpt-35-turbo-instruct", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-4.1-mini-2024-05-14", "name": "gpt-4.1-mini-2024-05-14", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-4o-audio-preview-2025-06-03", "name": "gpt-4o-audio-preview-2025-06-03", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-4o-realtime-preview-2024-12-17", "name": "gpt-4o-realtime-preview-2024-12-17", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-4o-search-preview-2025-03-11", "name": "gpt-4o-search-preview-2025-03-11", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-high", "name": "gpt-5.1-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o1-mini-all", "name": "o1-mini-all", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o1-preview-all", "name": "o1-preview-all", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-2025-04-16-high", "name": "o3-2025-04-16-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-2025-04-16-low", "name": "o3-2025-04-16-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-2025-04-16-medium", "name": "o3-2025-04-16-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-high", "name": "o3-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-low", "name": "o3-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-medium", "name": "o3-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "babbage-002", "name": "babbage-002", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-ocr-251023", "name": "deepseek-ocr-251023", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-tng-r1t2-chimera", "name": "deepseek-tng-r1t2-chimera", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.1-250821-thinking", "name": "deepseek-v3.1-250821-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.1-terminus-thinking", "name": "deepseek-v3.1-terminus-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.2-exp-250929", "name": "deepseek-v3.2-exp-250929", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "deepseek-v3.2-exp-250929-thinking", "name": "deepseek-v3.2-exp-250929-thinking", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-3.5-turbo-instruct-09-14", "name": "gpt-3.5-turbo-instruct-09-14", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-4o-64k-output-alpha", "name": "gpt-4o-64k-output-alpha", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-4o-mini-search-preview-2025-03-11", "name": "gpt-4o-mini-search-preview-2025-03-11", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-4o-transcribe-diarize", "name": "gpt-4o-transcribe-diarize", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-pro-2025-10-06", "name": "gpt-5-pro-2025-10-06", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-search", "name": "gpt-5-search", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-search-api", "name": "gpt-5-search-api", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-search-api-2025-10-14", "name": "gpt-5-search-api-2025-10-14", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-2025-11-13-high", "name": "gpt-5.1-2025-11-13-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-2025-11-13-low", "name": "gpt-5.1-2025-11-13-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-2025-11-13-medium", "name": "gpt-5.1-2025-11-13-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-2025-11-13-none", "name": "gpt-5.1-2025-11-13-none", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-low", "name": "gpt-5.1-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-medium", "name": "gpt-5.1-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-none", "name": "gpt-5.1-none", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-search", "name": "gpt-5.1-search", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-audio-mini", "name": "gpt-audio-mini", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-audio-mini-2025-10-06", "name": "gpt-audio-mini-2025-10-06", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-realtime-mini", "name": "gpt-realtime-mini", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-realtime-mini-2025-10-06", "name": "gpt-realtime-mini-2025-10-06", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt‑4o audio preview", "name": "gpt‑4o audio preview", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-2-vision", "name": "grok-2-vision", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-mini-beta-high", "name": "grok-3-mini-beta-high", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-mini-beta-low", "name": "grok-3-mini-beta-low", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-mini-beta-medium", "name": "grok-3-mini-beta-medium", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-mini-fast-beta-high", "name": "grok-3-mini-fast-beta-high", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-mini-fast-beta-low", "name": "grok-3-mini-fast-beta-low", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-mini-fast-beta-medium", "name": "grok-3-mini-fast-beta-medium", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-3-nx", "name": "grok-3-nx", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-4-0709-search", "name": "grok-4-0709-search", "owned_by": "xai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-4-fast-reasoning-search", "name": "grok-4-fast-reasoning-search", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o1-pro-2025-03-19", "name": "o1-pro-2025-03-19", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-deep-research-2025-06-26", "name": "o3-deep-research-2025-06-26", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-mini-high-2025-01-31", "name": "o3-mini-high-2025-01-31", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o3-pro-all", "name": "o3-pro-all", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o4-mini-2025-04-16-high", "name": "o4-mini-2025-04-16-high", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o4-mini-2025-04-16-low", "name": "o4-mini-2025-04-16-low", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o4-mini-2025-04-16-medium", "name": "o4-mini-2025-04-16-medium", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "o4-mini-deep-research-2025-06-26", "name": "o4-mini-deep-research-2025-06-26", "owned_by": "openai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "computer-use-preview-2025-03-11", "name": "computer-use-preview-2025-03-11", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-1-5-lite-32k-250115", "name": "doubao-1-5-lite-32k-250115", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-1-5-thinking-pro-250415", "name": "doubao-1-5-thinking-pro-250415", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-1-5-ui-tars-250428", "name": "doubao-1-5-ui-tars-250428", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-1-5-vision-pro-32k-250115", "name": "doubao-1-5-vision-pro-32k-250115", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-1.5-vision-lite-250315", "name": "doubao-1.5-vision-lite-250315", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-lite-128k-240828", "name": "doubao-lite-128k-240828", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-lite-32k-240828", "name": "doubao-lite-32k-240828", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "doubao-pro-32k-241215", "name": "doubao-pro-32k-241215", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-flash-lite-thinking-*", "name": "gemini-2.5-flash-lite-thinking-*", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-pro-thinking-*", "name": "gemini-2.5-pro-thinking-*", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5-flatfee", "name": "gpt-5-flatfee", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gpt-5.1-flatfee", "name": "gpt-5.1-flatfee", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "jina-reranker-v2-base-multilingual", "name": "jina-reranker-v2-base-multilingual", "owned_by": "jina", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "llama-4-maverick-17b-128e-instruct", "name": "llama-4-maverick-17b-128e-instruct", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "llama-4-scout-17b-16e-instruct", "name": "llama-4-scout-17b-16e-instruct", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "phi-4-mini-instruct", "name": "phi-4-mini-instruct", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "phi-4-mini-reasoning", "name": "phi-4-mini-reasoning", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "phi-4-reasoning", "name": "phi-4-reasoning", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-coder-plus-1106", "name": "qwen-coder-plus-1106", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-coder-turbo-0919", "name": "qwen-coder-turbo-0919", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-math-plus-0919", "name": "qwen-math-plus-0919", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-math-turbo-0919", "name": "qwen-math-turbo-0919", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-max-0403", "name": "qwen-max-0403", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-max-0428", "name": "qwen-max-0428", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-max-0919", "name": "qwen-max-0919", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-plus-0919", "name": "qwen-plus-0919", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "qwen-plus-2025-07-14", "name": "qwen-plus-2025-07-14", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "grok-4-reverse", "name": "grok-4-reverse", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "moonshot-v1-auto", "name": "moonshot-v1-auto", "owned_by": "moonshot", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1765198274 } }, { "id": "qwen3-omni-flash-2025-12-01", "name": "qwen3-omni-flash-2025-12-01", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764736816 } }, { "id": "qwen3-omni-flash-realtime-2025-12-01", "name": "qwen3-omni-flash-realtime-2025-12-01", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764736850 } }, { "id": "qwen3-livetranslate-flash-2025-12-01", "name": "qwen3-livetranslate-flash-2025-12-01", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764737030 } }, { "id": "qwen3-livetranslate-flash", "name": "qwen3-livetranslate-flash", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764836565 } }, { "id": "qwen-plus-2025-12-01", "name": "qwen-plus-2025-12-01", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764588482 } }, { "id": "qwen3-tts-vc-realtime-2025-11-27", "name": "qwen3-tts-vc-realtime-2025-11-27", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764126161 } }, { "id": "qwen3-tts-flash-2025-11-27", "name": "qwen3-tts-flash-2025-11-27", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764125897 } }, { "id": "qwen3-tts-flash-realtime-2025-11-27", "name": "qwen3-tts-flash-realtime-2025-11-27", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764125637 } }, { "id": "qwen-plus-2025-11-05", "name": "qwen-plus-2025-11-05", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764071954 } }, { "id": "qwen-mt-lite", "name": "qwen-mt-lite", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763619345 } }, { "id": "qwen-vl-ocr-2025-11-20", "name": "qwen-vl-ocr-2025-11-20", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763606696 } }, { "id": "qwen-mt-flash", "name": "qwen-mt-flash", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1762432503 } }, { "id": "gui-plus", "name": "gui-plus", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1762330077 } }, { "id": "qwen-image-edit-plus-2025-10-30", "name": "qwen-image-edit-plus-2025-10-30", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761808837 } }, { "id": "qwen-image-edit-plus", "name": "qwen-image-edit-plus", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763973984 } }, { "id": "qwen-deep-search-planning", "name": "qwen-deep-search-planning", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761806022 } }, { "id": "qwen3-asr-flash-realtime-2025-10-27", "name": "qwen3-asr-flash-realtime-2025-10-27", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763022126 } }, { "id": "qwen3-asr-flash-realtime", "name": "qwen3-asr-flash-realtime", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763022111 } }, { "id": "qwen3-vl-flash", "name": "qwen3-vl-flash", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764818715 } }, { "id": "qwen3-vl-flash-2025-10-15", "name": "qwen3-vl-flash-2025-10-15", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760688802 } }, { "id": "qwen3-tts-flash", "name": "qwen3-tts-flash", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763022176 } }, { "id": "qwen3-tts-flash-2025-09-18", "name": "qwen3-tts-flash-2025-09-18", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763022187 } }, { "id": "qwen3-tts-flash-realtime-2025-09-18", "name": "qwen3-tts-flash-realtime-2025-09-18", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763022209 } }, { "id": "qwen3-tts-flash-realtime", "name": "qwen3-tts-flash-realtime", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763022199 } }, { "id": "qwen3-omni-flash-realtime", "name": "qwen3-omni-flash-realtime", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761550537 } }, { "id": "qwen3-omni-flash-2025-09-15", "name": "qwen3-omni-flash-2025-09-15", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761550514 } }, { "id": "qwen3-omni-flash-realtime-2025-09-15", "name": "qwen3-omni-flash-realtime-2025-09-15", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761550561 } }, { "id": "qwen3-s2s-flash-realtime-2025-09-22", "name": "qwen3-s2s-flash-realtime-2025-09-22", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763022163 } }, { "id": "qwen3-livetranslate-flash-realtime", "name": "qwen3-livetranslate-flash-realtime", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763022138 } }, { "id": "qwen3-livetranslate-flash-realtime-2025-09-22", "name": "qwen3-livetranslate-flash-realtime-2025-09-22", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763022151 } }, { "id": "qwen2.5-14b-instruct-1m", "name": "qwen2.5-14b-instruct-1m", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760422035 } }, { "id": "qwen2.5-7b-instruct-1m", "name": "qwen2.5-7b-instruct-1m", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760422022 } }, { "id": "qwen-plus-2025-01-25", "name": "qwen-plus-2025-01-25", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760421714 } }, { "id": "qwq-plus-2025-03-05", "name": "qwq-plus-2025-03-05", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760421657 } }, { "id": "qvq-max", "name": "qvq-max", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760421485 } }, { "id": "qwen-omni-turbo", "name": "qwen-omni-turbo", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760688207 } }, { "id": "qwen-vl-max-2025-04-08", "name": "qwen-vl-max-2025-04-08", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760413715 } }, { "id": "qwen-vl-plus-2025-01-25", "name": "qwen-vl-plus-2025-01-25", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760413697 } }, { "id": "qwen-vl-plus-2025-05-07", "name": "qwen-vl-plus-2025-05-07", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760413614 } }, { "id": "qwen-turbo-2025-07-15", "name": "qwen-turbo-2025-07-15", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760413499 } }, { "id": "qwen3-coder-plus-2025-09-23", "name": "qwen3-coder-plus-2025-09-23", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764776933 } }, { "id": "qwen3-max-2025-09-23", "name": "qwen3-max-2025-09-23", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761227479 } }, { "id": "qwen3-vl-plus-2025-09-23", "name": "qwen3-vl-plus-2025-09-23", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760688714 } }, { "id": "qwen-plus-2025-09-11", "name": "qwen-plus-2025-09-11", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1757644887 } }, { "id": "qwen-vl-plus-2025-08-15", "name": "qwen-vl-plus-2025-08-15", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755567702 } }, { "id": "qwen-tts-2025-05-22", "name": "qwen-tts-2025-05-22", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763022059 } }, { "id": "qvq-plus", "name": "qvq-plus", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753955066 } }, { "id": "qvq-plus-2025-05-15", "name": "qvq-plus-2025-05-15", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753955051 } }, { "id": "qvq-max-2025-05-15", "name": "qvq-max-2025-05-15", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753955124 } }, { "id": "qwen-vl-max-2025-04-02", "name": "qwen-vl-max-2025-04-02", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753955443 } }, { "id": "deepseek-r1-distill-llama-8b", "name": "deepseek-r1-distill-llama-8b", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753955686 } }, { "id": "deepseek-r1-distill-qwen-1.5b", "name": "deepseek-r1-distill-qwen-1.5b", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753955733 } }, { "id": "qwen-turbo-0919", "name": "qwen-turbo-0919", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1754012959 } }, { "id": "codeqwen1.5-7b-chat", "name": "codeqwen1.5-7b-chat", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1714377102 } }, { "id": "qwen-max-1201", "name": "qwen-max-1201", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1714377101 } }, { "id": "qwen-max-0107", "name": "qwen-max-0107", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1714377100 } }, { "id": "meta-llama/llama-prompt-guard-2-86m", "name": "meta-llama/llama-prompt-guard-2-86m", "owned_by": "Meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "metadata": { "source": "api", "tags": [], "created": 1748632165 } }, { "id": "meta-llama/llama-4-maverick-17b-128e-instruct", "name": "meta-llama/llama-4-maverick-17b-128e-instruct", "owned_by": "Meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1743877158 } }, { "id": "llama-3.3-70b-versatile", "name": "llama-3.3-70b-versatile", "owned_by": "Meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1733447754 } }, { "id": "groq/compound", "name": "groq/compound", "owned_by": "Groq", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1756949530 } }, { "id": "playai-tts-arabic", "name": "playai-tts-arabic", "owned_by": "PlayAI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 1740682783 } }, { "id": "groq/compound-mini", "name": "groq/compound-mini", "owned_by": "Groq", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1756949707 } }, { "id": "meta-llama/llama-prompt-guard-2-22m", "name": "meta-llama/llama-prompt-guard-2-22m", "owned_by": "Meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "metadata": { "source": "api", "tags": [], "created": 1748632101 } }, { "id": "allam-2-7b", "name": "allam-2-7b", "owned_by": "SDAIA", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "metadata": { "source": "api", "tags": [], "created": 1737672203 } }, { "id": "meta-llama/llama-4-scout-17b-16e-instruct", "name": "meta-llama/llama-4-scout-17b-16e-instruct", "owned_by": "Meta", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1743874824 } }, { "id": "togethercomputer/refuel-llm-v2", "name": "togethercomputer/refuel-llm-v2", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16384, "metadata": { "source": "api", "tags": [], "created": 1747260038 } }, { "id": "cartesia/sonic", "name": "cartesia/sonic", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "cartesia/sonic-2", "name": "cartesia/sonic-2", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "meta-llama/llama-guard-7b", "name": "meta-llama/llama-guard-7b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "togethercomputer/moa-1", "name": "togethercomputer/moa-1", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1733856970 } }, { "id": "meta-llama/llamaguard-2-8b", "name": "meta-llama/llamaguard-2-8b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 1713429380 } }, { "id": "meta-llama/llama-3.3-70b-instruct-turbo", "name": "meta-llama/llama-3.3-70b-instruct-turbo", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1733466629 } }, { "id": "qwen/qwen2.5-72b-instruct-turbo", "name": "qwen/qwen2.5-72b-instruct-turbo", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1728633510 } }, { "id": "alibaba-nlp/gte-modernbert-base", "name": "alibaba-nlp/gte-modernbert-base", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 1747292078 } }, { "id": "kwaivgi/kling-1.6-standard", "name": "kwaivgi/kling-1.6-standard", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884920 } }, { "id": "meta-llama/llama-guard-3-11b-vision-turbo", "name": "meta-llama/llama-guard-3-11b-vision-turbo", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1727242489 } }, { "id": "black-forest-labs/flux.2-flex", "name": "black-forest-labs/flux.2-flex", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764090764 } }, { "id": "meta-llama/meta-llama-guard-3-8b", "name": "meta-llama/meta-llama-guard-3-8b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 1713429380 } }, { "id": "meta-llama/meta-llama-3.1-70b-instruct-turbo", "name": "meta-llama/meta-llama-3.1-70b-instruct-turbo", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1721603683 } }, { "id": "black-forest-labs/flux.1-dev-lora", "name": "black-forest-labs/flux.1-dev-lora", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1736906515 } }, { "id": "hidream-ai/hidream-i1-full", "name": "hidream-ai/hidream-i1-full", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884837 } }, { "id": "black-forest-labs/flux.1-krea-dev", "name": "black-forest-labs/flux.1-krea-dev", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753940366 } }, { "id": "togethercomputer/moa-1-turbo", "name": "togethercomputer/moa-1-turbo", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "lykon/dreamshaper", "name": "lykon/dreamshaper", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884763 } }, { "id": "hidream-ai/hidream-i1-dev", "name": "hidream-ai/hidream-i1-dev", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884844 } }, { "id": "rundiffusion/juggernaut-pro-flux", "name": "rundiffusion/juggernaut-pro-flux", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884821 } }, { "id": "google/imagen-4.0-preview", "name": "google/imagen-4.0-preview", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884778 } }, { "id": "google/imagen-4.0-ultra", "name": "google/imagen-4.0-ultra", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884786 } }, { "id": "google/veo-3.0", "name": "google/veo-3.0", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884899 } }, { "id": "minimax/hailuo-02", "name": "minimax/hailuo-02", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884967 } }, { "id": "stabilityai/stable-diffusion-3-medium", "name": "stabilityai/stable-diffusion-3-medium", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884829 } }, { "id": "arcee-ai/trinity-mini", "name": "arcee-ai/trinity-mini", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1764549884 } }, { "id": "minimax/video-01-director", "name": "minimax/video-01-director", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884960 } }, { "id": "deepseek-ai/deepseek-r1-0528-tput", "name": "deepseek-ai/deepseek-r1-0528-tput", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "metadata": { "source": "api", "tags": [], "created": 1750184204 } }, { "id": "google/flash-image-2.5", "name": "google/flash-image-2.5", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884801 } }, { "id": "meta-llama/llama-4-maverick-17b-128e-instruct-fp8", "name": "meta-llama/llama-4-maverick-17b-128e-instruct-fp8", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "metadata": { "source": "api", "tags": [], "created": 1743878353 } }, { "id": "servicenow-ai/apriel-1.5-15b-thinker", "name": "servicenow-ai/apriel-1.5-15b-thinker", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1759435628 } }, { "id": "scb10x/scb10x-typhoon-2-1-gemma3-12b", "name": "scb10x/scb10x-typhoon-2-1-gemma3-12b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1749574497 } }, { "id": "hidream-ai/hidream-i1-fast", "name": "hidream-ai/hidream-i1-fast", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884851 } }, { "id": "bytedance/seedance-1.0-pro", "name": "bytedance/seedance-1.0-pro", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884879 } }, { "id": "google/veo-3.0-fast-audio", "name": "google/veo-3.0-fast-audio", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884907 } }, { "id": "vidu/vidu-q1", "name": "vidu/vidu-q1", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884996 } }, { "id": "intfloat/multilingual-e5-large-instruct", "name": "intfloat/multilingual-e5-large-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 514, "metadata": { "source": "api", "tags": [], "created": 1745513588 } }, { "id": "meta-llama/meta-llama-3.1-8b-instruct-turbo", "name": "meta-llama/meta-llama-3.1-8b-instruct-turbo", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1741298134 } }, { "id": "bytedance-seed/seedream-3.0", "name": "bytedance-seed/seedream-3.0", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884740 } }, { "id": "bytedance-seed/seedream-4.0", "name": "bytedance-seed/seedream-4.0", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884757 } }, { "id": "meta-llama/meta-llama-3.1-405b-instruct-lite-pro", "name": "meta-llama/meta-llama-3.1-405b-instruct-lite-pro", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 4096, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "meta-llama/llama-3.2-3b-instruct-turbo", "name": "meta-llama/llama-3.2-3b-instruct-turbo", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1727229064 } }, { "id": "black-forest-labs/flux.1-kontext-max", "name": "black-forest-labs/flux.1-kontext-max", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "ideogram/ideogram-3.0", "name": "ideogram/ideogram-3.0", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884808 } }, { "id": "mixedbread-ai/mxbai-rerank-large-v2", "name": "mixedbread-ai/mxbai-rerank-large-v2", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1747942382 } }, { "id": "black-forest-labs/flux.1-kontext-dev", "name": "black-forest-labs/flux.1-kontext-dev", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750904593 } }, { "id": "black-forest-labs/flux.1.1-pro", "name": "black-forest-labs/flux.1.1-pro", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "meta-llama/meta-llama-3-70b-instruct-turbo", "name": "meta-llama/meta-llama-3-70b-instruct-turbo", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "black-forest-labs/flux.1-schnell-free", "name": "black-forest-labs/flux.1-schnell-free", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "black-forest-labs/flux.1-kontext-pro", "name": "black-forest-labs/flux.1-kontext-pro", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "openai/sora-2", "name": "openai/sora-2", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760480340 } }, { "id": "kwaivgi/kling-2.1-standard", "name": "kwaivgi/kling-2.1-standard", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884940 } }, { "id": "google/imagen-4.0-fast", "name": "google/imagen-4.0-fast", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884793 } }, { "id": "kwaivgi/kling-2.1-master", "name": "kwaivgi/kling-2.1-master", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884953 } }, { "id": "google/veo-3.0-audio", "name": "google/veo-3.0-audio", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884892 } }, { "id": "rundiffusion/juggernaut-lightning-flux", "name": "rundiffusion/juggernaut-lightning-flux", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884814 } }, { "id": "google/veo-2.0", "name": "google/veo-2.0", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884886 } }, { "id": "google/veo-3.0-fast", "name": "google/veo-3.0-fast", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884913 } }, { "id": "kwaivgi/kling-2.0-master", "name": "kwaivgi/kling-2.0-master", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884933 } }, { "id": "pixverse/pixverse-v5", "name": "pixverse/pixverse-v5", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884975 } }, { "id": "stabilityai/stable-diffusion-xl-base-1.0", "name": "stabilityai/stable-diffusion-xl-base-1.0", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884771 } }, { "id": "openai/sora-2-pro", "name": "openai/sora-2-pro", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1760480692 } }, { "id": "bytedance/seedance-1.0-lite", "name": "bytedance/seedance-1.0-lite", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884873 } }, { "id": "kwaivgi/kling-1.6-pro", "name": "kwaivgi/kling-1.6-pro", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884926 } }, { "id": "vidu/vidu-2.0", "name": "vidu/vidu-2.0", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759885002 } }, { "id": "kwaivgi/kling-2.1-pro", "name": "kwaivgi/kling-2.1-pro", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759884948 } }, { "id": "virtue-ai/virtueguard-text-lite", "name": "virtue-ai/virtueguard-text-lite", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1753221263 } }, { "id": "meta-llama/meta-llama-3-8b-instruct-lite", "name": "meta-llama/meta-llama-3-8b-instruct-lite", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "qwen/qwen3-235b-a22b-fp8-tput", "name": "qwen/qwen3-235b-a22b-fp8-tput", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "metadata": { "source": "api", "tags": [], "created": 1747854468 } }, { "id": "togethercomputer/refuel-llm-v2-small", "name": "togethercomputer/refuel-llm-v2-small", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 1747174868 } }, { "id": "arize-ai/qwen-2-1.5b-instruct", "name": "arize-ai/qwen-2-1.5b-instruct", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1745522693 } }, { "id": "qwen/qwen2.5-7b-instruct-turbo", "name": "qwen/qwen2.5-7b-instruct-turbo", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1728671048 } }, { "id": "hexgrad/kokoro-82m", "name": "hexgrad/kokoro-82m", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758585307 } }, { "id": "google/gemini-3-pro-image", "name": "google/gemini-3-pro-image", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763662095 } }, { "id": "mercor/cwm", "name": "mercor/cwm", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1763423489 } }, { "id": "mistralai/ministral-3-14b-instruct-2512", "name": "mistralai/ministral-3-14b-instruct-2512", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "metadata": { "source": "api", "tags": [], "created": 1764641096 } }, { "id": "qwen/qwen3-235b-a22b-instruct-2507-tput", "name": "qwen/qwen3-235b-a22b-instruct-2507-tput", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "metadata": { "source": "api", "tags": [], "created": 1753218184 } }, { "id": "black-forest-labs/flux.2-pro", "name": "black-forest-labs/flux.2-pro", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764070232 } }, { "id": "black-forest-labs/flux.2-dev", "name": "black-forest-labs/flux.2-dev", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764086551 } }, { "id": "canopylabs/orpheus-3b-0.1-ft", "name": "canopylabs/orpheus-3b-0.1-ft", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1755731205 } }, { "id": "meta-llama/llama-3-70b-chat-hf", "name": "meta-llama/llama-3-70b-chat-hf", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 1713429236 } }, { "id": "deepcogito/cogito-v2-1-671b", "name": "deepcogito/cogito-v2-1-671b", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "metadata": { "source": "api", "tags": [], "created": 1763064800 } }, { "id": "meta-llama/meta-llama-3.1-405b-instruct-turbo", "name": "meta-llama/meta-llama-3.1-405b-instruct-turbo", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 130815, "metadata": { "source": "api", "tags": [], "created": 1721698359 } }, { "id": "openai/whisper-large-v3", "name": "openai/whisper-large-v3", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1748454089 } }, { "id": "salesforce/llama-rank-v1", "name": "salesforce/llama-rank-v1", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 1723745254 } }, { "id": "togethercomputer/m2-bert-80m-32k-retrieval", "name": "togethercomputer/m2-bert-80m-32k-retrieval", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1699120644 } }, { "id": "meta-llama/llama-3-70b-hf", "name": "meta-llama/llama-3-70b-hf", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 1713429267 } }, { "id": "meta-llama/meta-llama-3.1-70b-instruct-reference", "name": "meta-llama/meta-llama-3.1-70b-instruct-reference", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 8192, "metadata": { "source": "api", "tags": [], "created": 1721603250 } }, { "id": "meta-llama/meta-llama-3.1-8b-instruct-reference", "name": "meta-llama/meta-llama-3.1-8b-instruct-reference", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 16384, "metadata": { "source": "api", "tags": [], "created": 1721603207 } }, { "id": "baai/bge-base-en-v1.5", "name": "baai/bge-base-en-v1.5", "owned_by": "unknown", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 512, "metadata": { "source": "api", "tags": [], "created": 1749601524 } }, { "id": "accounts/fireworks/models/flux-1-dev-fp8", "name": "accounts/fireworks/models/flux-1-dev-fp8", "owned_by": "fireworks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1729532889 } }, { "id": "accounts/fireworks/models/flux-kontext-max", "name": "accounts/fireworks/models/flux-kontext-max", "owned_by": "fireworks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750714611 } }, { "id": "accounts/fireworks/models/flux-kontext-pro", "name": "accounts/fireworks/models/flux-kontext-pro", "owned_by": "fireworks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750488264 } }, { "id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", "name": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", "owned_by": "sentientfoundation-serverless", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1748467427 } }, { "id": "accounts/fireworks/models/flux-1-schnell-fp8", "name": "accounts/fireworks/models/flux-1-schnell-fp8", "owned_by": "fireworks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1729535376 } }, { "id": "accounts/fireworks/models/qwen3-embedding-8b", "name": "accounts/fireworks/models/qwen3-embedding-8b", "owned_by": "fireworks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "metadata": { "source": "api", "tags": [], "created": 1755707090 } }, { "id": "accounts/fireworks/models/deepseek-v3-0324", "name": "accounts/fireworks/models/deepseek-v3-0324", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "metadata": { "source": "api", "tags": [], "created": 1742827220 } }, { "id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct", "name": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "metadata": { "source": "api", "tags": [], "created": 1753211090 } }, { "id": "accounts/fireworks/models/qwen3-reranker-8b", "name": "accounts/fireworks/models/qwen3-reranker-8b", "owned_by": "fireworks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "metadata": { "source": "api", "tags": [], "created": 1759865045 } }, { "id": "accounts/fireworks/models/llama-v3p3-70b-instruct", "name": "accounts/fireworks/models/llama-v3p3-70b-instruct", "owned_by": "fireworks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1733442103 } }, { "id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct", "name": "accounts/fireworks/models/qwen2p5-vl-32b-instruct", "owned_by": "fireworks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 128000, "metadata": { "source": "api", "tags": [], "created": 1743392739 } }, { "id": "accounts/fireworks/models/qwen3-235b-a22b", "name": "accounts/fireworks/models/qwen3-235b-a22b", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1745885249 } }, { "id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507", "name": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507", "owned_by": "fireworks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "metadata": { "source": "api", "tags": [], "created": 1753455434 } }, { "id": "accounts/fireworks/models/qwen3-30b-a3b", "name": "accounts/fireworks/models/qwen3-30b-a3b", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1745878133 } }, { "id": "accounts/fireworks/models/mixtral-8x22b-instruct", "name": "accounts/fireworks/models/mixtral-8x22b-instruct", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 65536, "metadata": { "source": "api", "tags": [], "created": 1713375508 } }, { "id": "accounts/fireworks/models/llama4-scout-instruct-basic", "name": "accounts/fireworks/models/llama4-scout-instruct-basic", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "metadata": { "source": "api", "tags": [], "created": 1743878279 } }, { "id": "accounts/fireworks/models/llama4-maverick-instruct-basic", "name": "accounts/fireworks/models/llama4-maverick-instruct-basic", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 1048576, "metadata": { "source": "api", "tags": [], "created": 1743878495 } }, { "id": "accounts/cogito/models/cogito-671b-v2-p1", "name": "accounts/cogito/models/cogito-671b-v2-p1", "owned_by": "cogito", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "metadata": { "source": "api", "tags": [], "created": 1763071906 } }, { "id": "accounts/fireworks/models/deepseek-v3p1", "name": "accounts/fireworks/models/deepseek-v3p1", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "metadata": { "source": "api", "tags": [], "created": 1755758988 } }, { "id": "accounts/fireworks/models/deepseek-v3p1-terminus", "name": "accounts/fireworks/models/deepseek-v3p1-terminus", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "metadata": { "source": "api", "tags": [], "created": 1758586241 } }, { "id": "accounts/fireworks/models/kimi-k2-instruct-0905", "name": "accounts/fireworks/models/kimi-k2-instruct-0905", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "metadata": { "source": "api", "tags": [], "created": 1757018994 } }, { "id": "accounts/fireworks/models/glm-4p6", "name": "accounts/fireworks/models/glm-4p6", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 202752, "metadata": { "source": "api", "tags": [], "created": 1759359059 } }, { "id": "accounts/revolut/models/cross-sell-product-selection-qwen3-coder-30b-v3", "name": "accounts/revolut/models/cross-sell-product-selection-qwen3-coder-30b-v3", "owned_by": "revolut", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758757208 } }, { "id": "accounts/fireworks/models/kimi-k2-thinking", "name": "accounts/fireworks/models/kimi-k2-thinking", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1762458100 } }, { "id": "accounts/fireworks/models/deepseek-r1-0528", "name": "accounts/fireworks/models/deepseek-r1-0528", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "metadata": { "source": "api", "tags": [], "created": 1748456377 } }, { "id": "accounts/fireworks/models/qwen3-vl-235b-a22b-instruct", "name": "accounts/fireworks/models/qwen3-vl-235b-a22b-instruct", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "metadata": { "source": "api", "tags": [], "created": 1758745689 } }, { "id": "accounts/fireworks/models/qwen3-vl-235b-a22b-thinking", "name": "accounts/fireworks/models/qwen3-vl-235b-a22b-thinking", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "metadata": { "source": "api", "tags": [], "created": 1758755532 } }, { "id": "accounts/fireworks/models/minimax-m2", "name": "accounts/fireworks/models/minimax-m2", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761548072 } }, { "id": "accounts/fireworks/models/glm-4p5", "name": "accounts/fireworks/models/glm-4p5", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1753809636 } }, { "id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507", "name": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "metadata": { "source": "api", "tags": [], "created": 1753124424 } }, { "id": "accounts/fireworks/models/deepseek-v3p2", "name": "accounts/fireworks/models/deepseek-v3p2", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 163840, "metadata": { "source": "api", "tags": [], "created": 1764602280 } }, { "id": "accounts/fireworks/models/gpt-oss-120b", "name": "accounts/fireworks/models/gpt-oss-120b", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1754345600 } }, { "id": "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct", "name": "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "metadata": { "source": "api", "tags": [], "created": 1759959171 } }, { "id": "accounts/fireworks/models/qwen3-vl-30b-a3b-thinking", "name": "accounts/fireworks/models/qwen3-vl-30b-a3b-thinking", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 262144, "metadata": { "source": "api", "tags": [], "created": 1759960063 } }, { "id": "accounts/fireworks/models/gpt-oss-20b", "name": "accounts/fireworks/models/gpt-oss-20b", "owned_by": "fireworks", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1754345466 } }, { "id": "accounts/fireworks/models/qwen3-8b", "name": "accounts/fireworks/models/qwen3-8b", "owned_by": "fireworks", "capabilities": [ "FUNCTION_CALL" ], "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 40960, "metadata": { "source": "api", "tags": [], "created": 1745906862 } }, { "id": "mistralai/devstral-2-123b-instruct-2512", "name": "mistralai/devstral-2-123b-instruct-2512", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 735790403 } }, { "id": "meta-llama/meta-llama-3.1-70b-instruct", "name": "meta-llama/meta-llama-3.1-70b-instruct", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1765296494 } }, { "id": "meta-llama/meta-llama-3.1-8b-instruct", "name": "meta-llama/meta-llama-3.1-8b-instruct", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1765296494 } }, { "id": "meta-llama/meta-llama-3.1-405b", "name": "meta-llama/meta-llama-3.1-405b", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1765296494 } }, { "id": "openai/gpt-oss-120b-turbo", "name": "openai/gpt-oss-120b-turbo", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 131072, "metadata": { "source": "api", "tags": [], "created": 1765296494 } }, { "id": "stablediffusion", "name": "stablediffusion", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296494 } }, { "id": "flux.1-dev", "name": "flux.1-dev", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296494 } }, { "id": "tts", "name": "tts", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296494 } }, { "id": "mistralai/pixtral-12b-2409", "name": "mistralai/pixtral-12b-2409", "owned_by": "Hyperbolic", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 32768, "metadata": { "source": "api", "tags": [], "created": 1765296494 } }, { "id": "mistral-medium-2505", "name": "mistral-medium-2505", "description": "Our frontier-class multimodal model released May 2025.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-medium-2508", "name": "mistral-medium-2508", "description": "Update on Mistral Medium 3 with improved capabilities.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-medium-latest", "name": "mistral-medium-2508", "description": "Update on Mistral Medium 3 with improved capabilities.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "open-mistral-7b", "name": "open-mistral-7b", "description": "Our first dense model released September 2023.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-tiny-2312", "name": "open-mistral-7b", "description": "Our first dense model released September 2023.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "open-mistral-nemo", "name": "open-mistral-nemo", "description": "Our best multilingual open source model released July 2024.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "open-mistral-nemo-2407", "name": "open-mistral-nemo", "description": "Our best multilingual open source model released July 2024.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-tiny-2407", "name": "open-mistral-nemo", "description": "Our best multilingual open source model released July 2024.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-tiny-latest", "name": "open-mistral-nemo", "description": "Our best multilingual open source model released July 2024.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "pixtral-large-latest", "name": "pixtral-large-2411", "description": "Official pixtral-large-2411 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-large-pixtral-2411", "name": "pixtral-large-2411", "description": "Official pixtral-large-2411 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "devstral-small-2507", "name": "devstral-small-2507", "description": "Our small open-source code-agentic model.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "devstral-medium-2507", "name": "devstral-medium-2507", "description": "Our medium code-agentic model.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-vibe-cli-latest", "name": "mistral-vibe-cli-latest", "description": "Official mistral-vibe-cli-latest Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "devstral-medium-2512", "name": "mistral-vibe-cli-latest", "description": "Official mistral-vibe-cli-latest Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "devstral-2512", "name": "mistral-vibe-cli-latest", "description": "Official mistral-vibe-cli-latest Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "devstral-medium-latest", "name": "mistral-vibe-cli-latest", "description": "Official mistral-vibe-cli-latest Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "labs-devstral-small-2512", "name": "labs-devstral-small-2512", "description": "Official labs-devstral-small-2512 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "devstral-small-latest", "name": "labs-devstral-small-2512", "description": "Official labs-devstral-small-2512 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-small-2506", "name": "mistral-small-2506", "description": "Our latest enterprise-grade small model with the latest version released June 2025.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-small-latest", "name": "mistral-small-2506", "description": "Our latest enterprise-grade small model with the latest version released June 2025.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "magistral-medium-2509", "name": "magistral-medium-2509", "description": "Our frontier-class reasoning model release candidate September 2025.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "magistral-medium-latest", "name": "magistral-medium-2509", "description": "Our frontier-class reasoning model release candidate September 2025.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "magistral-small-2509", "name": "magistral-small-2509", "description": "Our efficient reasoning model released September 2025.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "magistral-small-latest", "name": "magistral-small-2509", "description": "Our efficient reasoning model released September 2025.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "voxtral-mini-2507", "name": "voxtral-mini-2507", "description": "A mini audio understanding model released in July 2025", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "voxtral-mini-latest", "name": "voxtral-mini-2507", "description": "A mini audio understanding model released in July 2025", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "voxtral-small-2507", "name": "voxtral-small-2507", "description": "A small audio understanding model released in July 2025", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "voxtral-small-latest", "name": "voxtral-small-2507", "description": "A small audio understanding model released in July 2025", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-large-latest", "name": "mistral-large-2512", "description": "Official mistral-large-2512 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "ministral-3b-latest", "name": "ministral-3b-2512", "description": "Ministral 3 (a.k.a. Tinystral) 3B Instruct.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "ministral-8b-latest", "name": "ministral-8b-2512", "description": "Ministral 3 (a.k.a. Tinystral) 8B Instruct.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "ministral-14b-latest", "name": "ministral-14b-2512", "description": "Ministral 3 (a.k.a. Tinystral) 14B Instruct.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "pixtral-12b-2409", "name": "pixtral-12b-2409", "description": "A 12B model with image understanding capabilities in addition to text.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "pixtral-12b-latest", "name": "pixtral-12b-2409", "description": "A 12B model with image understanding capabilities in addition to text.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "ministral-3b-2410", "name": "ministral-3b-2410", "description": "World's best edge model.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "ministral-3b-latest", "name": "ministral-3b-2410", "description": "World's best edge model.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "ministral-8b-2410", "name": "ministral-8b-2410", "description": "Powerful edge model with extremely high performance/price ratio.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "ministral-8b-latest", "name": "ministral-8b-2410", "description": "Powerful edge model with extremely high performance/price ratio.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "codestral-2501", "name": "codestral-2501", "description": "Our cutting-edge language model for coding released December 2024.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "codestral-2412", "name": "codestral-2501", "description": "Our cutting-edge language model for coding released December 2024.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "codestral-2411-rc5", "name": "codestral-2501", "description": "Our cutting-edge language model for coding released December 2024.", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-small-2501", "name": "mistral-small-2501", "description": "Our latest enterprise-grade small model with the latest version released January 2025. ", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-embed", "name": "mistral-embed-2312", "description": "Official mistral-embed-2312 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "codestral-embed", "name": "codestral-embed", "description": "Official codestral-embed Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-moderation-2411", "name": "mistral-moderation-2411", "description": "Official mistral-moderation-2411 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-moderation-latest", "name": "mistral-moderation-2411", "description": "Official mistral-moderation-2411 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-ocr-2505", "name": "mistral-ocr-2505", "description": "Official mistral-ocr-2505 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-ocr-latest", "name": "mistral-ocr-2505", "description": "Official mistral-ocr-2505 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "mistral-ocr-2503", "name": "mistral-ocr-2503", "description": "Official mistral-ocr-2503 Mistral AI model", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "voxtral-mini-transcribe-2507", "name": "voxtral-mini-transcribe-2507", "description": "A mini transcription model released in July 2025", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "voxtral-mini-2507", "name": "voxtral-mini-transcribe-2507", "description": "A mini transcription model released in July 2025", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "voxtral-mini-latest", "name": "voxtral-mini-transcribe-2507", "description": "A mini transcription model released in July 2025", "owned_by": "mistralai", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1765296497 } }, { "id": "hunyuan-standard-32k", "name": "hunyuan-standard-32k", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1713873600 } }, { "id": "hunyuan-turbo-latest", "name": "hunyuan-turbo-latest", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1725447600 } }, { "id": "hunyuan-turbos-20250226", "name": "hunyuan-turbos-20250226", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1740571200 } }, { "id": "hunyuan-turbos-latest", "name": "hunyuan-turbos-latest", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1740571200 } }, { "id": "hunyuan-turbos-20250313", "name": "hunyuan-turbos-20250313", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1742313600 } }, { "id": "hunyuan-t1-latest", "name": "hunyuan-t1-latest", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1742737064 } }, { "id": "hunyuan-t1-vision", "name": "hunyuan-t1-vision", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1747670400 } }, { "id": "hunyuan-turbos-20250515", "name": "hunyuan-turbos-20250515", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1747670400 } }, { "id": "hunyuan-large-vision", "name": "hunyuan-large-vision", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1748275200 } }, { "id": "hunyuan-t1-20250529", "name": "hunyuan-t1-20250529", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749038307 } }, { "id": "hunyuan-turbos-20250604", "name": "hunyuan-turbos-20250604", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749038307 } }, { "id": "hunyuan-turbos-vision-20250619", "name": "hunyuan-turbos-vision-20250619", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750315523 } }, { "id": "hunyuan-t1-vision-20250619", "name": "hunyuan-t1-vision-20250619", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750847006 } }, { "id": "hunyuan-turbos-20250716", "name": "hunyuan-turbos-20250716", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1752811180 } }, { "id": "hunyuan-vision-7b-20250720", "name": "hunyuan-vision-7b-20250720", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1753782656 } }, { "id": "hunyuan-large-role-20250822", "name": "hunyuan-large-role-20250822", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1756275853 } }, { "id": "hunyuan-t1-20250822", "name": "hunyuan-t1-20250822", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1756285239 } }, { "id": "hunyuan-t1-vision-20250916", "name": "hunyuan-t1-vision-20250916", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1758787205 } }, { "id": "hunyuan-turbos-20250926", "name": "hunyuan-turbos-20250926", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1759028388 } }, { "id": "hunyuan-large-role-latest", "name": "hunyuan-large-role-latest", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1761106774 } }, { "id": "hunyuan-turbos-role-20251114", "name": "hunyuan-turbos-role-20251114", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1763037462 } }, { "id": "hunyuan-2.0-thinking-20251109", "name": "hunyuan-2.0-thinking-20251109", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764559602 } }, { "id": "hunyuan-2.0-instruct-20251111", "name": "hunyuan-2.0-instruct-20251111", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764559602 } }, { "id": "hunyuan-role-latest", "name": "hunyuan-role-latest", "owned_by": "system", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1764847137 } }, { "id": "zai-glm-4.6", "name": "zai-glm-4.6", "owned_by": "Cerebras", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 0 } }, { "id": "gemini-3-pro-preview-thinking-ssvip", "name": "gemini-3-pro-preview-thinking-ssvip", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "jimeng_ti2v_v30_pro", "name": "jimeng_ti2v_v30_pro", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "jimeng_t2v_v30", "name": "jimeng_t2v_v30", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "jimeng_t2v_v30_1080p", "name": "jimeng_t2v_v30_1080p", "owned_by": "custom", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1626777600 } }, { "id": "gemini-2.5-flash-e", "name": "gemini-2.5-flash-e", "owned_by": "gemini-2.5-flash-e", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1749562518000 } }, { "id": "magistral-medium-2509-thinking", "name": "magistral-medium-2509-thinking", "description": "Magistral Medium 2509 (thinking) by EmpirioLabs.\nMagistral is Mistral's first reasoning model. It is ideal for general purpose use requiring longer thought processing and better accuracy than with non-reasoning LLMs. From legal research and financial forecasting to software development and creative storytelling — this model solves multi-step challenges where transparency and precision are critical. Context Window: 40,000k\nSupported file type uploads: PDF, XLSX, TXT, PNG, JPG, JPEG", "owned_by": "Empirio Labs AI", "input_modalities": [ "TEXT" ], "output_modalities": [ "TEXT" ], "context_window": 0, "metadata": { "source": "api", "tags": [], "created": 1750288555644 } } ] }