CINXE.COM
AI Proxy Advanced - Plugin | Kong Docs
<!DOCTYPE html> <html lang="en-US" itemscope itemtype="http://schema.org/Article"> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <!-- OneTrust Cookies Consent Notice start for konghq.com --> <script src="https://cdn.cookielaw.org/scripttemplates/otSDKStub.js" type="text/javascript" charset="UTF-8" data-domain-script="2c4de954-6bec-4e93-8086-64cb113f151a"> </script> <script type="text/javascript"> function OptanonWrapper() { } </script> <!-- OneTrust Cookies Consent Notice end for konghq.com --> <!-- Google Tag Manager --> <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer', 'GTM-NL48VKT');</script> <!-- End Google Tag Manager --> <meta charset="utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta name="viewport" content="width=device-width, initial-scale=1"> <title>AI Proxy Advanced - Plugin | Kong Docs</title> <meta name="description" content="Documentation for Kong, the Cloud Connectivity Company for APIs and Microservices."> <meta name="author" content="KongHQ"> <meta property="og:title" content="AI Proxy Advanced - Plugin | Kong Docs"> <meta property="og:site_name" content="Kong Docs"> <!-- use share link for facebook --> <meta property="og:url" content="https://docs.konghq.com"> <meta property="og:description" content="Documentation for Kong, the Cloud Connectivity Company for APIs and Microservices."> <meta property="og:type" content="website"> <meta property="og:locale" content="en_US"> <meta property="og:image" content="https://docs.konghq.com/assets/images/share.png"> <meta name="twitter:card" content="summary_large_image"> <meta name="twitter:site" content="@thekonginc"> <meta name="twitter:creator" content="@thekonginc"> <meta name="twitter:url" content="https://docs.konghq.com"> <meta name="twitter:description" content="Documentation for Kong, the Cloud Connectivity Company for APIs and Microservices."> <meta name="twitter:image" content="https://docs.konghq.com/assets/images/share.png"> <meta property="fb:admins" content="227304446"> <meta property="fb:admins" content="576641408"> <meta name="google-site-verification" content="CrU3zp02dNKTe8NSAipL4NCPkrIjDXG8fViTZ-MIzP4"> <script type="application/ld+json"> { "@context": "http://schema.org", "@type": "Organization", "name": "KongHQ", "url": "https://docs.konghq.com", "logo": "https://docs.konghq.com/assets/images/logo.png", "sameAs": [ "https://www.facebook.com/konginc", "https://twitter.com/thekonginc", "https://plus.google.com/+mashape" ] } </script> <!-- Preload assets --> <link rel="dns-prefetch" href="https://cloud.typography.com"> <link rel="dns-prefetch" href="https://dev.visualwebsiteoptimizer.com"> <link rel="dns-prefetch" href="https://cdn.segment.com"> <link rel="icon" type="image/x-icon" href="/assets/images/favicon.ico"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@docsearch/css@3"> <link rel="canonical" href="https://docs.konghq.com/hub/kong-inc/ai-proxy-advanced/"> <link rel="alternate" hreflang="x-default" href="https://docs.konghq.com/hub/kong-inc/ai-proxy-advanced/"> <link rel="alternate" hreflang="ja" href="https://docs.jp.konghq.com/hub/kong-inc/ai-proxy-advanced/"> <meta name="robots" content="follow,index"> <!-- FontAwesome icon font --> <script src="https://kit.fontawesome.com/1332a92967.js" crossorigin="anonymous"> </script> <script src="/vite/assets/application-D8sXFsvE.js" crossorigin="anonymous" type="module"></script> <link href="/vite/assets/_commonjsHelpers-Cpj98o6Y.js" rel="modulepreload" as="script" crossorigin="anonymous"> <link rel="stylesheet" href="/vite/assets/application-C5Quk452.css" media="screen"> </head> <body id="" data-spy="scroll" data-target="#scroll-sidebar" data-offset="350"> <!-- Google Tag Manager (noscript) --> <noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-NL48VKT" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript> <!-- End Google Tag Manager (noscript) --> <header class="navbar-v2 closed"> <a class="skip-main" href="#main">Skip to content</a> <!-- uncomment the promo-banner div when adding a new promo banner--> <!--also uncomment the promo banner sections in app/assets/stylesheets/header.less and application.js--> <!-- <div id="promo-banner"> <div class="container"> <div class="closebanner"></div> <strong>2024 API Summit Hackathon: Experiment with API Innovation & AI. Submit by Sept 11 —<a href="https://konghq.com/conferences/kong-summit/hackathon?utm_medium=website&utm_source=docs-konghq-com&utm_campaign=docs-banner">Enter Now →</a> </strong> </div> </div> --> <div class="navbar-content"> <a href="https://konghq.com" class="navbar-brand col col-xl-auto" target="_blank" rel="noopener noreferrer"> <img src="/assets/images/logos/konglogo-dark-theme.svg" alt="Kong Logo" id="kong-logo"> </a> <span class="logo-divider">|</span> <a href="/" class="navbar-brand col col-xl-auto"> <img src="/assets/images/logos/docslogo-dark-theme.svg" alt="Kong Docs Logo" id="kong-docs-logo"> </a> <div class="separator mobile"></div> <div class="search-input-wrapper" id="getkong-algolia-search-input"> </div> <div class="search-results-wrapper"></div> <div class="navbar-items" role="navigation" aria-label="Main menu"> <ul class="navbar-items" role="menubar"> <li id="top-module-list" aria-haspopup="true" role="menuitem" aria-expanded="false" class="navbar-item main-menu-item with-submenu active"> <span tabindex="0" id="docs-link" class="main-menu-item-title">Docs</span> <span class="caret"></span> <ul class="navbar-item-submenu" role="menu"> <div class="submenu-section"> <li role="menuitem" class="docs-dropdown-li"> <a href="/api/" class="docs-dropdown-li__link" tabindex="-1"> <div class="docs-dropdown-li__card"> <span class="heading">Explore the API Specs</span> <div class="docs-dropdown-li__card-link"> <img src="/assets/images/landing-page/view-all-api-specs.png" alt="View all API Specs"> <span class="docs-dropdown-li__card-image"> View all API Specs <img src="/assets/images/landing-page/arrow-right.svg" alt="View all API Specs arrow image"> </span> </div> </div> </a> </li> <li role="menuitem" class="docs-dropdown-li" tabindex="-1"> <div class="docs-dropdown-li__section"> <div class="docs-dropdown-li__section-title"> <span class="heading">Documentation</span> </div> <div class="docs-dropdown-li__section-items"> <a class="item item-all" href="/api/" tabindex="-1"> <div class="item__description"> <div class="item__description-title">API Specs</div> </div> </a> <a class="item" href="/gateway/latest/" tabindex="-1"> <div class="item__description"> <div class="item__description-title">Kong Gateway</div> <div class="item__description-desc">Lightweight, fast, and flexible cloud-native API gateway</div> </div> </a> <a class="item" href="/konnect/" tabindex="-1"> <div class="item__description"> <div class="item__description-title">Kong Konnect</div> <div class="item__description-desc">Single platform for SaaS end-to-end connectivity</div> </div> </a> <a class="item" href="/gateway/latest/ai-gateway/" tabindex="-1"> <div class="item__description"> <div class="item__description-title">Kong AI Gateway</div> <div class="item__description-desc">Multi-LLM AI Gateway for GenAI infrastructure</div> </div> </a> <a class="item" href="/mesh/latest/" tabindex="-1"> <div class="item__description"> <div class="item__description-title">Kong Mesh</div> <div class="item__description-desc">Enterprise service mesh based on Kuma and Envoy</div> </div> </a> <a class="item" href="/deck/" tabindex="-1"> <div class="item__description"> <div class="item__description-title">decK</div> <div class="item__description-desc">Helps manage Kong’s configuration in a declarative fashion</div> </div> </a> <a class="item" href="/kubernetes-ingress-controller/latest/" tabindex="-1"> <div class="item__description"> <div class="item__description-title">Kong Ingress Controller</div> <div class="item__description-desc">Works inside a Kubernetes cluster and configures Kong to proxy traffic</div> </div> </a> <a class="item" href="/gateway-operator/latest/" tabindex="-1"> <div class="item__description"> <div class="item__description-title">Kong Gateway Operator</div> <div class="item__description-desc">Manage your Kong deployments on Kubernetes using YAML Manifests</div> </div> </a> <a class="item" href="https://docs.insomnia.rest/" tabindex="-1" target="_blank" rel="noopener nofollow noreferrer "> <div class="item__description"> <div class="item__description-title">Insomnia</div> <div class="item__description-desc">Collaborative API development platform</div> </div> </a> </div> </div> </li> </div> </ul> </li> <li role="menuitem" aria-haspopup="true" aria-expanded="false" class="navbar-item main-menu-item with-submenu navbar-item-hub"> <span id="plugin-link" class="main-menu-item-title" tabindex="0">Plugin Hub</span> <span class="caret"></span> <ul class="navbar-item-submenu" role="menu"> <div class="submenu-section"> <li role="menuitem" class="docs-dropdown-li"> <a href="/hub/" class="docs-dropdown-li__link" tabindex="-1"> <div class="docs-dropdown-li__card"> <span class="heading">Explore the Plugin Hub</span> <div class="docs-dropdown-li__card-link"> <img src="/assets/images/landing-page/view-all-plugins.svg" alt="View all plugins"> <span class="docs-dropdown-li__card-image"> View all plugins <img src="/assets/images/landing-page/arrow-right.svg" alt="View all plugins arrow image"> </span> </div> </div> </a> </li> <li role="menuitem" class="docs-dropdown-li"> <div class="docs-dropdown-li__section"> <div class="docs-dropdown-li__section-title"> <span class="heading">Functionality</span> <a href="/hub/" class="view-all" tabindex="-1"> View all <img src="/assets/images/landing-page/arrow-right.svg" alt="View all arrow image"> </a> </div> <div class="docs-dropdown-li__section-items"> <a class="item item-all" href="/hub/" tabindex="-1"> <div class="item__description"> <div class="item__description-title">View all plugins</div> </div> </a> <a class="item" href="/hub/?category=ai" tabindex="-1"> <div> <img src="/assets/images/nav/hub/ai.svg" alt="AI's icon"> </div> <div class="item__description"> <div class="item__description-title">AI</div> <div class="item__description-desc">Govern, secure, and control AI traffic with multi-LLM AI Gateway plugins</div> </div> </a> <a class="item" href="/hub/?category=authentication" tabindex="-1"> <div> <img src="/assets/images/nav/hub/lock_person.svg" alt="Authentication's icon"> </div> <div class="item__description"> <div class="item__description-title">Authentication</div> <div class="item__description-desc">Protect your services with an authentication layer</div> </div> </a> <a class="item" href="/hub/?category=security" tabindex="-1"> <div> <img src="/assets/images/nav/hub/shield.svg" alt="Security's icon"> </div> <div class="item__description"> <div class="item__description-title">Security</div> <div class="item__description-desc">Protect your services with additional security layer</div> </div> </a> <a class="item" href="/hub/?category=traffic-control" tabindex="-1"> <div> <img src="/assets/images/nav/hub/route.svg" alt="Traffic Control's icon"> </div> <div class="item__description"> <div class="item__description-title">Traffic Control</div> <div class="item__description-desc">Manage, throttle and restrict inbound and outbound API traffic</div> </div> </a> <a class="item" href="/hub/?category=serverless" tabindex="-1"> <div> <img src="/assets/images/nav/hub/serverless.svg" alt="Serverless's icon"> </div> <div class="item__description"> <div class="item__description-title">Serverless</div> <div class="item__description-desc">Invoke serverless functions in combination with other plugins</div> </div> </a> <a class="item" href="/hub/?category=analytics-monitoring" tabindex="-1"> <div> <img src="/assets/images/nav/hub/bar_chart.svg" alt="Analytics & Monitoring's icon"> </div> <div class="item__description"> <div class="item__description-title">Analytics & Monitoring</div> <div class="item__description-desc">Visualize, inspect and monitor APIs and microservices traffic</div> </div> </a> <a class="item" href="/hub/?category=transformations" tabindex="-1"> <div> <img src="/assets/images/nav/hub/swap_horiz.svg" alt="Transformations's icon"> </div> <div class="item__description"> <div class="item__description-title">Transformations</div> <div class="item__description-desc">Transform request and responses on the fly on Kong</div> </div> </a> <a class="item" href="/hub/?category=logging" tabindex="-1"> <div> <img src="/assets/images/nav/hub/list_alt.svg" alt="Logging's icon"> </div> <div class="item__description"> <div class="item__description-title">Logging</div> <div class="item__description-desc">Log request and response data using the best transport for your infrastructure</div> </div> </a> </div> </div> </li> </div> </ul> </li> <li role="menuitem" class="main-menu-item"> <a href="https://support.konghq.com/" class="navbar-item" target="_blank" rel="noopener nofollow noreferrer ">Support</a> </li> <li role="menuitem" class="main-menu-item"> <a href="https://konghq.com/community/" class="navbar-item" target="_blank" rel="noopener noreferrer">Community</a> </li> <li role="menuitem" class="main-menu-item"> <a href="https://education.konghq.com" class="navbar-item" target="_blank" rel="noopener nofollow noreferrer ">Kong Academy</a> </li> </ul> <a id="top-cta" href="https://konghq.com/contact-sales?utm_source=docs.konghq.com" class="navbar-button" target="_blank" rel="noopener nofollow noreferrer "> Get a Demo </a> <a id="konnect-cta" href="https://konghq.com/products/kong-konnect/register?utm_medium=referral&utm_source=docs&utm_campaign=gateway-konnect&utm_content=top-nav" class="navbar-button" target="_blank" rel="noopener nofollow noreferrer "> Start Free Trial </a> </div> <div id="navbar-menu-toggle-button" class="small-screen-button" aria-label="Toggle navigation"> <div></div> <div></div> <div></div> </div> </div> </header> <!-- BEGIN PAGE OUTPUT --> <div class="search-selector">Plugin Hub</div> <div class="page v2 hub" data-url="/hub/kong-inc/ai-proxy-advanced/"> <div class="page--header-background"></div> <div class="container"> <header class="page-header"> <div class="page-header--nav"> <i class="sidebar-toggle"></i> <ul class="breadcrumbs"> <li class="breadcrumb-item"> <a href="/hub/"> <img src="/assets/images/icons/hub-layout/icn-breadcrumbs.svg" alt="Plugin Hub icon"> </a> </li> <li class="breadcrumb-item"> <a href="/hub/?category=ai">AI</a> </li> <li class="breadcrumb-item active"> <a href="/hub/kong-inc/ai-proxy-advanced/">AI Proxy Advanced</a> </li> <li class="breadcrumb-item"> Introduction </li> <li class="breadcrumb-item active"> <a href="/hub/kong-inc/ai-proxy-advanced/">Overview</a> </li> </ul> <div class="github-links"> <div class="github-links--edit"> <a href="https://github.com/Kong/docs.konghq.com/edit/main/app/_hub/kong-inc/ai-proxy-advanced/overview/_index.md" target="_blank" rel="noopener nofollow noreferrer "> <img src="/assets/images/icons/third-party/logo-github-white.svg" alt="github-edit-page">Edit this page </a> </div> <div class="github-links--issues"> <a href="https://github.com/Kong/docs.konghq.com/issues/" target="_blank" rel="noopener nofollow noreferrer "> <img src="/assets/images/icons/documentation/icn-monitoring-white.svg" alt="report-issue">Report an issue</a> </div> </div> </div> <div class="page-header--info"> <div class="page-header--info-icon"> <img src="/assets/images/icons/hub/kong-inc_ai-proxy-advanced.png" alt="header icon"> </div> <div class="page-header--info-meta"> <div class="meta--title"> <h1 id="main" tabindex="-1">AI Proxy Advanced</h1> <div> <div class="versions-dropdown dropdown"> <button class="dropdown-button" id="version-dropdown" type="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> Gateway Version 3.9.x <em>(latest)</em> <span class="caret"></span> </button> <ul class="dropdown-menu dropdown-menu-right" id="version-list" role="menu" aria-labelledby="version-dropdown"> <li class=""> <a href="/hub/kong-inc/ai-proxy-advanced/unreleased/"> unreleased </a> </li> <li class="active"> <a href="/hub/kong-inc/ai-proxy-advanced/"> 3.9.x <em>(latest)</em> </a> </li> <li class=""> <a href="/hub/kong-inc/ai-proxy-advanced/3.8.x/"> 3.8.x </a> </li> </ul> </div> </div> </div> <div class="meta--content"> <div class="meta--content-title"><span>By Kong Inc.</span></div> <div class="meta--content-badges"> <a href="https://konghq.com/pricing" target="_blank" class="badge konnect" aria-label="available in Konnect" rel="noopener nofollow noreferrer "> </a> <a href="https://konghq.com/pricing" target="_blank" class="badge enterprise" aria-label="available with Kong Gateway Enterprise subscription" rel="noopener nofollow noreferrer "> </a> </div> </div> </div> </div> </header> <aside class="docs-sidebar"> <i class="fa fa-times close-sidebar"></i> <ul class="sidebar-container" role="tree" aria-label="Plugin's Documentation"> <li class="sidebar-item plugin-hub" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/"> <img src="/assets/images/icons/hub-layout/icn-breadcrumbs.svg" alt=""> Plugin Hub </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label" aria-expanded="false" aria-owns="id-1-introduction-subtree"> <img src="/assets/images/icons/hub-layout/icn-overview.svg" alt=""> Introduction <button class="sidebar-tree-toggle" aria-label="toggle Introduction subtree" tabindex="-1"> <i class="fa fa-chevron-down"></i> </button> </span> <ul class="items" id="id-1-introduction-subtree" role="group" aria-label="Introduction"> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/"> Overview </a> </span> </li> </ul> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/configuration/"> <img src="/assets/images/icons/hub-layout/icn-configuration.svg" alt=""> Configuration reference </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label" aria-expanded="false" aria-owns="id-3-using-the-plugin-subtree"> <img src="/assets/images/icons/hub-layout/icn-how-to.svg" alt=""> Using the plugin <button class="sidebar-tree-toggle" aria-label="toggle Using the plugin subtree" tabindex="-1"> <i class="fa fa-chevron-down"></i> </button> </span> <ul class="items" id="id-3-using-the-plugin-subtree" role="group" aria-label="Using the plugin"> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/basic-example/"> Basic config examples </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label" aria-expanded="false" aria-owns="id-3-2-llm-provider-integration-guides-subtree"> Llm Provider Integration Guides <button class="sidebar-tree-toggle" aria-label="toggle Llm Provider Integration Guides subtree" tabindex="-1"> <i class="fa fa-chevron-down"></i> </button> </span> <ul class="items" id="id-3-2-llm-provider-integration-guides-subtree" role="group" aria-label="Llm Provider Integration Guides"> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/anthropic/"> Anthropic </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/azure/"> Azure </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/bedrock/"> Bedrock </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/cohere/"> Cohere </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/gemini/"> Gemini/Vertex </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/huggingface/"> Hugging Face </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/llama2/"> Llama2 </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/mistral/"> Mistral </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/openai/"> OpenAI </a> </span> </li> </ul> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/cloud-provider-authentication/"> Cloud Provider Authentication </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/load-balancing/"> Configure Load Balancing with AI Proxy Advanced </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/sdk-usage/"> Use Programmatic SDKs with AI Proxy Advanced </a> </span> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/how-to/streaming/"> Streaming </a> </span> </li> </ul> </li> <li class="sidebar-item" role="none"> <span role="treeitem" class="sidebar-label"> <a class="sidebar-link" tabindex="-1" href="/hub/kong-inc/ai-proxy-advanced/changelog/"> <img src="/assets/images/icons/hub-layout/icn-changelog.svg" alt=""> Changelog </a> </span> </li> <li> <a id="konnect-cta" href="https://konghq.com/products/kong-konnect/register?utm_medium=referral&utm_source=docs&utm_campaign=gateway-konnect&utm_content=ai-proxy-advanced" class="sidebar-button" target="_blank" rel="noopener nofollow noreferrer "> Try it in Konnect </a> </li> </ul> </aside> <aside class="docs-toc" id="plugin-toc"> <i class="fa fa-times close-sidebar"></i> <i class="fa fa-chevron-right collapse-toc"></i> <i class="far fa-list-alt expand-toc"></i> <div class="docs-toc-title"> <img src="/assets/images/icons/hub-layout/icn-on-this-page.svg" alt="On this page"><a href="#">On this page</a> </div> <ul> <li><a href="#how-it-works" class="active scroll-to">How it works</a></li> <li><a href="#load-balancing" class="scroll-to">Load balancing</a></li> <li><a href="#retry-and-fallback" class="scroll-to">Retry and fallback</a></li> <li> <a href="#request-and-response-formats" class="scroll-to">Request and response formats</a> <ul> <li><a href="#input-formats" class="scroll-to">Input formats</a></li> <li><a href="#response-formats" class="scroll-to">Response formats</a></li> </ul> </li> <li> <a href="#get-started-with-the-ai-proxy-plugin" class="scroll-to">Get started with the AI Proxy plugin</a> <ul> <li><a href="#all-ai-gateway-plugins" class="scroll-to">All AI Gateway plugins</a></li> </ul> </li> </ul> </aside> <div class="page-content-container v2" id="documentation"> <div class="page-content"> <div class="content show-anchor-links"> <blockquote> <p> <em>Looking for the plugin's configuration parameters? You can find them in the <a href="/hub/kong-inc/ai-proxy-advanced/configuration/">AI Proxy Advanced configuration reference</a> doc.</em> </p> </blockquote> <p>The AI Proxy Advanced plugin lets you transform and proxy requests to multiple AI providers and models at the same time. This lets you set up load balancing between targets.</p> <p>The plugin accepts requests in one of a few defined and standardized formats, translates them to the configured target format, and then transforms the response back into a standard format.</p> <p>The following table describes which providers and requests the AI Proxy Advanced plugin supports:</p> <table> <thead> <tr> <th>Provider</th> <th>Chat</th> <th>Completion</th> <th>Streaming</th> </tr> </thead> <tbody> <tr> <td>OpenAI (GPT-4, GPT-3.5)</td> <td>✅</td> <td>✅</td> <td>✅</td> </tr> <tr> <td>OpenAI (GPT-4o and Multi-Modal)</td> <td>✅</td> <td>✅</td> <td>✅</td> </tr> <tr> <td>Cohere</td> <td>✅</td> <td>✅</td> <td>✅</td> </tr> <tr> <td>Azure</td> <td>✅</td> <td>✅</td> <td>✅</td> </tr> <tr> <td>Anthropic</td> <td>✅</td> <td>❌</td> <td>Only chat type</td> </tr> <tr> <td>Mistral (mistral.ai, OpenAI, raw, and OLLAMA formats)</td> <td>✅</td> <td>✅</td> <td>✅</td> </tr> <tr> <td>Llama2 (raw, OLLAMA, and OpenAI formats)</td> <td>✅</td> <td>✅</td> <td>✅</td> </tr> <tr> <td>Llama3 (OLLAMA and OpenAI formats)</td> <td>✅</td> <td>✅</td> <td>✅</td> </tr> <tr> <td>Amazon Bedrock</td> <td>✅</td> <td>✅</td> <td>✅</td> </tr> <tr> <td>Gemini</td> <td>✅</td> <td>✅</td> <td>✅</td> </tr> <tr> <td>Hugging Face</td> <td>✅</td> <td>✅</td> <td>✅</td> </tr> </tbody> </table> <h2 id="how-it-works">How it works</h2> <p>The AI Proxy Advanced plugin will mediate the following for you:</p> <ul> <li>Request and response formats appropriate for the configured <code class="language-plaintext highlighter-rouge">provider</code> and <code class="language-plaintext highlighter-rouge">route_type</code> </li> <li>The following service request coordinates (unless the model is self-hosted): <ul> <li>Protocol</li> <li>Host name</li> <li>Port</li> <li>Path</li> <li>HTTP method</li> </ul> </li> <li>Authentication on behalf of the Kong API consumer</li> <li>Decorating the request with parameters from the <code class="language-plaintext highlighter-rouge">config.options</code> block, appropriate for the chosen provider</li> <li>Recording of usage statistics of the configured LLM provider and model into your selected <a href="/hub/?category=logging">Kong log</a> plugin output</li> <li>Optionally, additionally recording all post-transformation request and response messages from users, to and from the configured LLM</li> <li>Fulfillment of requests to self-hosted models, based on select supported format transformations</li> </ul> <p>Flattening all of the provider formats allows you to standardize the manipulation of the data before and after transmission. It also allows your to provide a choice of LLMs to the Kong consumers, using consistent request and response formats, regardless of the backend provider or model.</p> <p>This plugin currently only supports REST-based full text responses.</p> <h2 id="load-balancing">Load balancing</h2> <p>This plugin supports several load-balancing algorithms, similar to those used for Kong upstreams, allowing efficient distribution of requests across different AI models. The supported algorithms include:</p> <ul> <li> <strong>Lowest-usage</strong>: The lowest-usage algorithm in AI Proxy Advanced is based on the volume of usage for each model. It balances the load by distributing requests to models with the lowest usage, measured by factors such as prompt token counts, response token counts, or other resource metrics.</li> <li> <strong>Lowest-latency</strong>: The lowest-latency algorithm is based on the response time for each model. It distributes requests to models with the lowest response time.</li> <li> <strong>Semantic</strong>: The semantic algorithm distributes requests to different models based on the similarity between the prompt in the request and the description provided in the model configuration. This allows Kong to automatically select the model that is best suited for the given domain or use case. This feature enhances the flexibility and efficiency of model selection, especially when dealing with a diverse range of AI providers and models.</li> <li><a href="/gateway/latest/how-kong-works/load-balancing/#round-robin">Round-robin (weighted)</a></li> <li><a href="/gateway/latest/how-kong-works/load-balancing/#consistent-hashing">Consistent-hashing (sticky-session on given header value)</a></li> </ul> <h2 id="retry-and-fallback">Retry and fallback</h2> <p>The load balancer has customizable retries and timeouts for requests, and can redirect a request to a different model in case of failure. This allows you to have a fallback in case one of your targets is unavailable.</p> <p>This plugin does not support fallback over targets with different formats. You can use different providers as long as the formats are compatible.For example, load balancers with these combinations of targets are supported:</p> <ul> <li>Different OpenAI models</li> <li>OpenAI models and Mistral models with the OpenAI format</li> <li>Mistral models with the OLLAMA format and Llama models with the OLLAMA format</li> </ul> <blockquote class="note"> <p>Some errors, such as client errors, result in a failure and don’t failover to another target.</p> </blockquote> <h2 id="request-and-response-formats">Request and response formats</h2> <p>The plugin’s <a href="/hub/kong-inc/ai-proxy-advanced/configuration/#config-targets-route_type"><code class="language-plaintext highlighter-rouge">config.route_type</code></a> should be set based on the target upstream endpoint and model, based on this capability matrix:</p> <table> <thead> <tr> <th>Provider Name</th> <th>Provider Upstream Path</th> <th>Kong <code class="language-plaintext highlighter-rouge">route_type</code> </th> <th>Example Model Name</th> </tr> </thead> <tbody> <tr> <td>OpenAI</td> <td><code class="language-plaintext highlighter-rouge">/v1/chat/completions</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/chat</code></td> <td>gpt-4</td> </tr> <tr> <td>OpenAI</td> <td><code class="language-plaintext highlighter-rouge">/v1/completions</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/completions</code></td> <td>gpt-3.5-turbo-instruct</td> </tr> <tr> <td>Cohere</td> <td><code class="language-plaintext highlighter-rouge">/v1/chat</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/chat</code></td> <td>command</td> </tr> <tr> <td>Cohere</td> <td><code class="language-plaintext highlighter-rouge">/v1/generate</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/completions</code></td> <td>command</td> </tr> <tr> <td>Azure</td> <td><code class="language-plaintext highlighter-rouge">/openai/deployments/{deployment_name}/chat/completions</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/chat</code></td> <td>gpt-4</td> </tr> <tr> <td>Azure</td> <td><code class="language-plaintext highlighter-rouge">/openai/deployments/{deployment_name}/completions</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/completions</code></td> <td>gpt-3.5-turbo-instruct</td> </tr> <tr> <td>Anthropic</td> <td><code class="language-plaintext highlighter-rouge">/v1/messages</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/chat</code></td> <td>claude-2.1</td> </tr> <tr> <td>Anthropic</td> <td><code class="language-plaintext highlighter-rouge">/v1/complete</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/completions</code></td> <td>claude-2.1</td> </tr> <tr> <td>Llama2</td> <td>User-defined</td> <td><code class="language-plaintext highlighter-rouge">llm/v1/chat</code></td> <td>User-defined</td> </tr> <tr> <td>Llama2</td> <td>User-defined</td> <td><code class="language-plaintext highlighter-rouge">llm/v1/completions</code></td> <td>User-defined</td> </tr> <tr> <td>Mistral</td> <td>User-defined</td> <td><code class="language-plaintext highlighter-rouge">llm/v1/chat</code></td> <td>User-defined</td> </tr> <tr> <td>Mistral</td> <td>User-defined</td> <td><code class="language-plaintext highlighter-rouge">llm/v1/completions</code></td> <td>User-defined</td> </tr> <tr> <td>Amazon Bedrock</td> <td>Use the LLM <code class="language-plaintext highlighter-rouge">chat</code> upstream path</td> <td><code class="language-plaintext highlighter-rouge">llm/v1/chat</code></td> <td><a href="https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html" target="_blank" rel="noopener nofollow noreferrer ">Use the model name for the specific LLM provider</a></td> </tr> <tr> <td>Amazon Bedrock</td> <td>Use the LLM <code class="language-plaintext highlighter-rouge">completions</code> upstream path</td> <td><code class="language-plaintext highlighter-rouge">llm/v1/completions</code></td> <td><a href="https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html" target="_blank" rel="noopener nofollow noreferrer ">Use the model name for the specific LLM provider</a></td> </tr> <tr> <td>Gemini</td> <td><code class="language-plaintext highlighter-rouge">llm/v1/chat</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/chat</code></td> <td> <code class="language-plaintext highlighter-rouge">gemini-1.5-flash</code> or <code class="language-plaintext highlighter-rouge">gemini-1.5-pro</code> </td> </tr> <tr> <td>Gemini</td> <td><code class="language-plaintext highlighter-rouge">llm/v1/completions</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/completions</code></td> <td> <code class="language-plaintext highlighter-rouge">gemini-1.5-flash</code> or <code class="language-plaintext highlighter-rouge">gemini-1.5-pro</code> </td> </tr> <tr> <td>Hugging Face</td> <td><code class="language-plaintext highlighter-rouge">/models/{model_provider}/{model_name}</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/chat</code></td> <td><a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending" target="_blank" rel="noopener nofollow noreferrer ">Use the model name for the specific LLM provider</a></td> </tr> <tr> <td>Hugging Face</td> <td><code class="language-plaintext highlighter-rouge">/models/{model_provider}/{model_name}</code></td> <td><code class="language-plaintext highlighter-rouge">llm/v1/completions</code></td> <td><a href="https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending" target="_blank" rel="noopener nofollow noreferrer ">Use the model name for the specific LLM provider</a></td> </tr> </tbody> </table> <p>The following upstream URL patterns are used:</p> <table> <thead> <tr> <th>Provider</th> <th>URL</th> </tr> </thead> <tbody> <tr> <td>OpenAI</td> <td><code class="language-plaintext highlighter-rouge">https://api.openai.com:443/{route_type_path}</code></td> </tr> <tr> <td>Cohere</td> <td><code class="language-plaintext highlighter-rouge">https://api.cohere.com:443/{route_type_path}</code></td> </tr> <tr> <td>Azure</td> <td><code class="language-plaintext highlighter-rouge">https://{azure_instance}.openai.azure.com:443/openai/deployments/{deployment_name}/{route_type_path}</code></td> </tr> <tr> <td>Anthropic</td> <td><code class="language-plaintext highlighter-rouge">https://api.anthropic.com:443/{route_type_path}</code></td> </tr> <tr> <td>Llama2</td> <td>As defined in <code class="language-plaintext highlighter-rouge">config.model.options.upstream_url</code> </td> </tr> <tr> <td>Mistral</td> <td>As defined in <code class="language-plaintext highlighter-rouge">config.model.options.upstream_url</code> </td> </tr> <tr> <td>Amazon Bedrock</td> <td><code class="language-plaintext highlighter-rouge">https://bedrock-runtime.{region}.amazonaws.com</code></td> </tr> <tr> <td>Gemini</td> <td><code class="language-plaintext highlighter-rouge">https://generativelanguage.googleapis.com</code></td> </tr> <tr> <td>Hugging Face</td> <td><code class="language-plaintext highlighter-rouge">https://api-inference.huggingface.co</code></td> </tr> </tbody> </table> <blockquote class="important"> <p>While only the <strong>Llama2</strong> and <strong>Mistral</strong> models are classed as self-hosted, the target URL can be overridden for any of the supported providers. For example, a self-hosted or otherwise OpenAI-compatible endpoint can be called by setting the same <a href="/hub/kong-inc/ai-proxy-advanced/configuration/#config-embeddings-model-options-upstream_url"><code class="language-plaintext highlighter-rouge">config.model.options.upstream_url</code></a> plugin option.</p> </blockquote> <h3 id="input-formats">Input formats</h3> <p>Kong will mediate the request and response format based on the selected <a href="/hub/kong-inc/ai-proxy-advanced/configuration/#config-embeddings-model-provider"><code class="language-plaintext highlighter-rouge">config.provider</code></a> and <a href="/hub/kong-inc/ai-proxy-advanced/configuration/#config-targets-route_type"><code class="language-plaintext highlighter-rouge">config.route_type</code></a>, as outlined in the table above.</p> <p>The Kong AI Proxy accepts the following inputs formats, standardized across all providers; the <code class="language-plaintext highlighter-rouge">config.route_type</code> must be configured respective to the required request and response format examples:</p> <div class="navtabs "> <div class="navtab-titles" role="tablist"> <div data-slug="llmv1chat" data-navtab-id="navtab-61112465-9e51-44cc-9ff8-66a533c21cd7-0" class="navtab-title" role="tab" aria-controls="navtab-id-0" tabindex="0"> llm/v1/chat </div> <div data-slug="llmv1completions" data-navtab-id="navtab-61112465-9e51-44cc-9ff8-66a533c21cd7-1" class="navtab-title" role="tab" aria-controls="navtab-id-1" tabindex="0"> llm/v1/completions </div> </div> <div class="navtab-contents"> <div data-panel="llmv1chat" data-navtab-content="navtab-61112465-9e51-44cc-9ff8-66a533c21cd7-0" class="navtab-content" role="tabpanel" id="navtab-id-0" tabindex="0" aria-labelledby="navtab-id-0"> <div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="p">{</span><span class="w"> </span><span class="nl">"messages"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"role"</span><span class="p">:</span><span class="w"> </span><span class="s2">"system"</span><span class="p">,</span><span class="w"> </span><span class="nl">"content"</span><span class="p">:</span><span class="w"> </span><span class="s2">"You are a scientist."</span><span class="w"> </span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"role"</span><span class="p">:</span><span class="w"> </span><span class="s2">"user"</span><span class="p">,</span><span class="w"> </span><span class="nl">"content"</span><span class="p">:</span><span class="w"> </span><span class="s2">"What is the theory of relativity?"</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">]</span><span class="w"> </span><span class="p">}</span><span class="w"> </span></code></pre></div></div> <p>With Amazon Bedrock, you can include your <a href="https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html" target="_blank" rel="noopener nofollow noreferrer ">guardrail</a> configuration in the request:</p> <div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="p">{</span><span class="w"> </span><span class="nl">"messages"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"role"</span><span class="p">:</span><span class="w"> </span><span class="s2">"system"</span><span class="p">,</span><span class="w"> </span><span class="nl">"content"</span><span class="p">:</span><span class="w"> </span><span class="s2">"You are a scientist."</span><span class="w"> </span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"role"</span><span class="p">:</span><span class="w"> </span><span class="s2">"user"</span><span class="p">,</span><span class="w"> </span><span class="nl">"content"</span><span class="p">:</span><span class="w"> </span><span class="s2">"What is the theory of relativity?"</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">],</span><span class="w"> </span><span class="nl">"extra_body"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"guardrailConfig"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"guardrailIdentifier"</span><span class="p">:</span><span class="s2">"<guardrail_identifier>"</span><span class="p">,</span><span class="w"> </span><span class="nl">"guardrailVersion"</span><span class="p">:</span><span class="s2">"1"</span><span class="p">,</span><span class="w"> </span><span class="nl">"trace"</span><span class="p">:</span><span class="s2">"enabled"</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span><span class="w"> </span></code></pre></div></div> </div> <div data-panel="llmv1completions" data-navtab-content="navtab-61112465-9e51-44cc-9ff8-66a533c21cd7-1" class="navtab-content" role="tabpanel" id="navtab-id-1" tabindex="0" aria-labelledby="navtab-id-1"> <div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="p">{</span><span class="w"> </span><span class="nl">"prompt"</span><span class="p">:</span><span class="w"> </span><span class="s2">"You are a scientist. What is the theory of relativity?"</span><span class="w"> </span><span class="p">}</span><span class="w"> </span></code></pre></div></div> </div> </div> </div> <h3 id="response-formats">Response formats</h3> <p>Conversely, the response formats are also transformed to a standard format across all providers:</p> <div class="navtabs "> <div class="navtab-titles" role="tablist"> <div data-slug="llmv1chat" data-navtab-id="navtab-ca093809-a743-4d9d-9886-bb3de2982dee-0" class="navtab-title" role="tab" aria-controls="navtab-id-0" tabindex="0"> llm/v1/chat </div> <div data-slug="llmv1completions" data-navtab-id="navtab-ca093809-a743-4d9d-9886-bb3de2982dee-1" class="navtab-title" role="tab" aria-controls="navtab-id-1" tabindex="0"> llm/v1/completions </div> </div> <div class="navtab-contents"> <div data-panel="llmv1chat" data-navtab-content="navtab-ca093809-a743-4d9d-9886-bb3de2982dee-0" class="navtab-content" role="tabpanel" id="navtab-id-0" tabindex="0" aria-labelledby="navtab-id-0"> <div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="p">{</span><span class="w"> </span><span class="nl">"choices"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"finish_reason"</span><span class="p">:</span><span class="w"> </span><span class="s2">"stop"</span><span class="p">,</span><span class="w"> </span><span class="nl">"index"</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="nl">"message"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"content"</span><span class="p">:</span><span class="w"> </span><span class="s2">"The theory of relativity is a..."</span><span class="p">,</span><span class="w"> </span><span class="nl">"role"</span><span class="p">:</span><span class="w"> </span><span class="s2">"assistant"</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">],</span><span class="w"> </span><span class="nl">"created"</span><span class="p">:</span><span class="w"> </span><span class="mi">1707769597</span><span class="p">,</span><span class="w"> </span><span class="nl">"id"</span><span class="p">:</span><span class="w"> </span><span class="s2">"chatcmpl-ID"</span><span class="p">,</span><span class="w"> </span><span class="nl">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"gpt-4-0613"</span><span class="p">,</span><span class="w"> </span><span class="nl">"object"</span><span class="p">:</span><span class="w"> </span><span class="s2">"chat.completion"</span><span class="p">,</span><span class="w"> </span><span class="nl">"usage"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"completion_tokens"</span><span class="p">:</span><span class="w"> </span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="nl">"prompt_tokens"</span><span class="p">:</span><span class="w"> </span><span class="mi">26</span><span class="p">,</span><span class="w"> </span><span class="nl">"total_tokens"</span><span class="p">:</span><span class="w"> </span><span class="mi">31</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span><span class="w"> </span></code></pre></div></div> </div> <div data-panel="llmv1completions" data-navtab-content="navtab-ca093809-a743-4d9d-9886-bb3de2982dee-1" class="navtab-content" role="tabpanel" id="navtab-id-1" tabindex="0" aria-labelledby="navtab-id-1"> <div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="p">{</span><span class="w"> </span><span class="nl">"choices"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"finish_reason"</span><span class="p">:</span><span class="w"> </span><span class="s2">"stop"</span><span class="p">,</span><span class="w"> </span><span class="nl">"index"</span><span class="p">:</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="nl">"text"</span><span class="p">:</span><span class="w"> </span><span class="s2">"The theory of relativity is a..."</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">],</span><span class="w"> </span><span class="nl">"created"</span><span class="p">:</span><span class="w"> </span><span class="mi">1707769597</span><span class="p">,</span><span class="w"> </span><span class="nl">"id"</span><span class="p">:</span><span class="w"> </span><span class="s2">"cmpl-ID"</span><span class="p">,</span><span class="w"> </span><span class="nl">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"gpt-3.5-turbo-instruct"</span><span class="p">,</span><span class="w"> </span><span class="nl">"object"</span><span class="p">:</span><span class="w"> </span><span class="s2">"text_completion"</span><span class="p">,</span><span class="w"> </span><span class="nl">"usage"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="nl">"completion_tokens"</span><span class="p">:</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="nl">"prompt_tokens"</span><span class="p">:</span><span class="w"> </span><span class="mi">7</span><span class="p">,</span><span class="w"> </span><span class="nl">"total_tokens"</span><span class="p">:</span><span class="w"> </span><span class="mi">17</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="p">}</span><span class="w"> </span></code></pre></div></div> </div> </div> </div> <p>The request and response formats are loosely based on OpenAI. See the <a href="https://github.com/kong/kong/blob/master/spec/fixtures/ai-proxy/oas.yaml" target="_blank" rel="noopener nofollow noreferrer ">sample OpenAPI specification</a> for more detail on the supported formats.</p> <h2 id="get-started-with-the-ai-proxy-plugin">Get started with the AI Proxy plugin</h2> <ul> <li><a href="/hub/kong-inc/ai-proxy-advanced/configuration/">Configuration reference</a></li> <li><a href="/hub/kong-inc/ai-proxy-advanced/how-to/basic-example/">Basic configuration example</a></li> <li>Learn how to use the plugin with different providers: <ul> <li><a href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/openai/">OpenAI</a></li> <li><a href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/cohere/">Cohere</a></li> <li><a href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/azure/">Azure</a></li> <li><a href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/anthropic/">Anthropic</a></li> <li><a href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/mistral/">Mistral</a></li> <li><a href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/llama2/">Llama2</a></li> <li><a href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/gemini/">Gemini/Vertex</a></li> <li><a href="/hub/kong-inc/ai-proxy-advanced/how-to/llm-provider-integration-guides/bedrock/">Amazon Bedrock</a></li> </ul> </li> </ul> <h3 id="all-ai-gateway-plugins">All AI Gateway plugins</h3> <ul> <li><a href="/hub/kong-inc/ai-proxy/">AI Proxy</a></li> <li> <a href="/hub/kong-inc/ai-proxy-advanced/">AI Proxy Advanced</a> <span class="badge enterprise"></span> </li> <li><a href="/hub/kong-inc/ai-request-transformer/">AI Request Transformer</a></li> <li><a href="/hub/kong-inc/ai-response-transformer/">AI Response Transformer</a></li> <li> <a href="/hub/kong-inc/ai-semantic-cache/">AI Semantic Cache</a> <span class="badge enterprise"></span> </li> <li> <a href="/hub/kong-inc/ai-semantic-prompt-guard/">AI Semantic Prompt Guard</a> <span class="badge enterprise"></span> </li> <li> <a href="/hub/kong-inc/ai-rate-limiting-advanced/">AI Rate Limiting Advanced</a> <span class="badge enterprise"></span> </li> <li> <a href="/hub/kong-inc/ai-azure-content-safety/">AI Azure Content Safety</a> <span class="badge enterprise"></span> </li> <li><a href="/hub/kong-inc/ai-prompt-template/">AI Prompt Template</a></li> <li><a href="/hub/kong-inc/ai-prompt-guard/">AI Prompt Guard</a></li> <li><a href="/hub/kong-inc/ai-prompt-decorator/">AI Prompt Decorator</a></li> </ul> <div class="book-nav-container"> <hr> <div class="book-nav next"> <span class="direction">Next</span> <a href="/hub/kong-inc/ai-proxy-advanced/configuration/">AI Proxy Advanced Configuration</a> </div> </div> </div> </div> </div> </div> <div id="scroll-to-top-button"> <i class="fas fa-chevron-up"></i> </div> <div class="feedback-widget-container"> <input id="feedback-widget-checkbox" type="checkbox"> <label for="feedback-widget-checkbox"> <img src="/assets/images/icons/feedback-widget.svg" alt="Feedback widget"> </label> <div class="feedback-container"> <div class="feedback-thankyou"> Thank you for your feedback. </div> <div class="feedback-comment"> <textarea id="feedback-comment-text" rows="3" placeholder="Please let us know what we can improve on this page..."></textarea> <div class="feedback-comment-buttons"> <button id="feedback-comment-button-back">Back</button> <button id="feedback-comment-button-submit" class="button-primary">Submit</button> </div> </div> <div class="feedback-options"> <div class="feedback-options-title">Was this page useful?</div> <div class="feedback-options-buttons"> <i data-feedback-result="yes" class="feedback-options-button far fa-thumbs-up"></i> <i data-feedback-result="no" class="feedback-options-button far fa-thumbs-down"></i> </div> </div> </div> </div> </div> <div id="image-modal" data-image-expand-disabled=""> <div class="image-modal-backdrop"></div> <div class="image-container"> <img src="" alt=""> <i class="fa fa-times"></i> </div> </div> <footer class="marketing-footer--light-gray"> <section> <ul class="newsletter"> <li class="logo-wrapper"> <div class="logo"> <img src="/assets/images/logos/konglogo-light-theme-primary.svg" alt="Kong"> </div> <div class="footer-title">Powering the API world</div> <p> Increase developer productivity, security, and performance at scale with the unified platform for API management, service mesh, and ingress controller. </p> <div class="footer-form-container"> <form id="subscribe-form" method="POST" action="/assets/javascripts/subscribe.js"> <input required id="subscribe-input" type="email" name="email" placeholder="Email" aria-required="true" aria-invalid="false"> <input id="footer-form-button" type="submit" form="subscribe-form" value="Subscribe"> </form> <div id="form-response"></div> </div> </li> <li class="footer-columns"> <ul class="footer-columns-product-list"> <li> <nav> <div class="footer-category">Products</div> <ul> <li> <a href="https://konghq.com/products/kong-konnect" target="_blank" rel="noopener nofollow noreferrer ">Kong Konnect</a> </li> <li> <a href="https://konghq.com/products/kong-enterprise" target="_blank" rel="noopener nofollow noreferrer ">Kong Gateway Enterprise</a> </li> <li> <a href="https://konghq.com/products/kong-gateway" target="_blank" rel="noopener nofollow noreferrer ">Kong Gateway</a> </li> <li> <a href="https://konghq.com/products/kong-mesh" target="_blank" rel="noopener nofollow noreferrer ">Kong Mesh</a> </li> <li> <a href="https://konghq.com/products/kong-ingress-controller" target="_blank" rel="noopener nofollow noreferrer ">Kong Ingress Controller</a> </li> <li> <a href="https://insomnia.rest/" target="_blank" rel="noopener nofollow noreferrer noopener nofollow noreferrer">Kong Insomnia</a> </li> <li> <a href="https://konghq.com/product-updates" target="_blank" rel="noopener nofollow noreferrer ">Product Updates</a> </li> <li> <a href="https://konghq.com/contact-sales" target="_blank" rel="noopener nofollow noreferrer ">Get Started</a> </li> </ul> </nav> </li> <li> <nav> <div class="footer-category">Documentation</div> <ul> <li> <a href="/konnect/">Kong Konnect Docs</a> </li> <li> <a href="/gateway/latest/">Kong Gateway Docs</a> </li> <li> <a href="/gateway/latest/kong-enterprise/">Kong Gateway Enterprise Docs</a> </li> <li> <a href="/mesh/latest/">Kong Mesh Docs</a> </li> <li> <a href="https://docs.insomnia.rest/" target="_blank" rel="noopener nofollow noreferrer noopener nofollow noreferrer">Kong Insomnia Docs</a> </li> <li> <a href="/hub/">Kong Konnect Plugin Hub</a> </li> </ul> </nav> </li> <li> <nav> <div class="footer-category">Open Source</div> <ul> <li> <a href="https://konghq.com/install/#kong-community" target="_blank" rel="noopener nofollow noreferrer ">Kong Gateway</a> </li> <li> <a href="https://kuma.io/" target="_blank" rel="noopener nofollow noreferrer noopener nofollow noreferrer">Kuma</a> </li> <li> <a href="https://insomnia.rest/" target="_blank" rel="noopener nofollow noreferrer noopener nofollow noreferrer">Insomnia</a> </li> <li> <a href="https://konghq.com/community" target="_blank" rel="noopener nofollow noreferrer ">Kong Community</a> </li> </ul> </nav> </li> <li> <nav> <div class="footer-category">Company</div> <ul> <li> <a href="https://konghq.com/company/about-us" target="_blank" rel="noopener nofollow noreferrer ">About Kong</a> </li> <li> <a href="https://konghq.com/customers" target="_blank" rel="noopener nofollow noreferrer ">Customers</a> </li> <li> <a href="https://konghq.com/company/careers" target="_blank" rel="noopener nofollow noreferrer ">Careers</a> </li> <li> <a href="https://konghq.com/press-room" target="_blank" rel="noopener nofollow noreferrer ">Press</a> </li> <li> <a href="https://konghq.com/events" target="_blank" rel="noopener nofollow noreferrer ">Events</a> </li> <li> <a href="https://konghq.com/company/contact-us" target="_blank" rel="noopener nofollow noreferrer ">Contact</a> </li> </ul> </nav> </li> </ul> </li> </ul> </section> <section class="legal"> <div class="container d-flex"> <div class="social"> <div class="social-link"> <a href="https://www.facebook.com/konghq/" title="Facebook" target="_blank" rel="noopener nofollow noreferrer "><i aria-label="Facebook" class="fa fa-facebook-official" aria-hidden="true"></i></a> </div> <div class="social-link"> <a href="https://twitter.com/thekonginc" title="Twitter" target="_blank" rel="noopener nofollow noreferrer "><i aria-label="Twitter" class="fa fa-twitter" aria-hidden="true"></i></a> </div> <div class="social-link"> <a href="https://www.meetup.com/topics/kong/all/" title="Meetup" target="_blank" rel="noopener nofollow noreferrer "><i aria-label="Meetup" class="fa fa-meetup" aria-hidden="true"></i></a> </div> <div class="social-link"> <a href="https://linkedin.com/company/278819" title="LinkedIn" target="_blank" rel="noopener nofollow noreferrer "><i aria-label="GitHub" class="fa fa-linkedin" aria-hidden="true"></i></a> </div> <div class="social-link"> <a href="https://github.com/kong/kong" target="_blank" class="btn-gh" title="GitHub" rel="noopener nofollow noreferrer "> <i class="fa fa-github" aria-hidden="true" aria-label="GitHub"></i> </a> </div> </div> <ul> <li> <span class="mashape-footer-content"> <a href="https://konghq.com/legal/terms-of-use" target="_blank" rel="noopener nofollow noreferrer ">Terms</a><b>•</b> <a href="https://konghq.com/legal/privacy-policy" target="_blank" rel="noopener nofollow noreferrer ">Privacy</a><b>•</b> <a href="https://konghq.com/compliance" target="_blank" rel="noopener nofollow noreferrer ">Trust and Compliance</a> </span> </li> </ul> <div> <span>© Kong Inc. 2025 </span> </div> </div> </section> </footer> <script> var anchorForId = function (id) { var anchor = document.createElement("a"); anchor.className = "header-link"; anchor.href = "#" + id; anchor.innerHTML = "<i class=\"fa fa-link\"></i>"; anchor.title = `${id} Permalink`; return anchor; }; document.onreadystatechange = function () { if (this.readyState === "complete") { var className = ".show-anchor-links h1, .show-anchor-links h2, .show-anchor-links h3, " + ".show-anchor-links h4, .show-anchor-links h5, .show-anchor-links h6"; var headers = document.querySelectorAll(className); for (var i = 0; i < headers.length; i++) { var header = headers[i]; if (typeof header.id !== "undefined" && header.id !== "") { header.prepend(anchorForId(header.id)); } } } }; </script> <script> !function(){var i="analytics",analytics=window[i]=window[i]||[];if(!analytics.initialize)if(analytics.invoked)window.console&&console.error&&console.error("Segment snippet included twice.");else{analytics.invoked=!0;analytics.methods=["trackSubmit","trackClick","trackLink","trackForm","pageview","identify","reset","group","track","ready","alias","debug","page","screen","once","off","on","addSourceMiddleware","addIntegrationMiddleware","setAnonymousId","addDestinationMiddleware","register"];analytics.factory=function(e){return function(){if(window[i].initialized)return window[i][e].apply(window[i],arguments);var n=Array.prototype.slice.call(arguments);if(["track","screen","alias","group","page","identify"].indexOf(e)>-1){var c=document.querySelector("link[rel='canonical']");n.push({__t:"bpc",c:c&&c.getAttribute("href")||void 0,p:location.pathname,u:location.href,s:location.search,t:document.title,r:document.referrer})}n.unshift(e);analytics.push(n);return analytics}};for(var n=0;n<analytics.methods.length;n++){var key=analytics.methods[n];analytics[key]=analytics.factory(key)}analytics.load=function(key,n){var t=document.createElement("script");t.type="text/javascript";t.async=!0;t.setAttribute("data-global-segment-analytics-key",i);t.src="https://cdn.segment.com/analytics.js/v1/" + key + "/analytics.min.js";var r=document.getElementsByTagName("script")[0];r.parentNode.insertBefore(t,r);analytics._loadOptions=n};analytics._writeKey="X7EZTdbdUKQ8M6x42SHHPWiEhjsfs1EQ";;analytics.SNIPPET_VERSION="5.2.0"; analytics.load("X7EZTdbdUKQ8M6x42SHHPWiEhjsfs1EQ"); analytics.page(); }}(); </script> <div id="fb-root"></div> <script id="github-bjs" src="https://buttons.github.io/buttons.js" async defer></script> <script type="text/javascript"> var _vwo_code = (function() { var account_id = 125292, settings_tolerance = 2000, library_tolerance = 2500, use_existing_jquery = true, // DO NOT EDIT BELOW THIS LINE f = false, d = document; return { use_existing_jquery: function() { return use_existing_jquery; }, library_tolerance: function() { return library_tolerance; }, finish: function() { if (!f) { f = true; var a = d.getElementById('_vis_opt_path_hides'); if (a) a.parentNode.removeChild(a); } }, finished: function() { return f; }, load: function(a) { var b = d.createElement('script'); b.src = a; b.type = 'text/javascript'; b.innerText; b.onerror = function() { _vwo_code.finish(); }; d.getElementsByTagName('head')[0].appendChild(b); }, init: function() { settings_timer = setTimeout( '_vwo_code.finish()', settings_tolerance ); this.load( '//dev.visualwebsiteoptimizer.com/j.php?a=' + account_id + '&u=' + encodeURIComponent(d.URL) + '&r=' + Math.random() ); var a = d.createElement('style'), b = '', h = d.getElementsByTagName('head')[0]; a.setAttribute('id', '_vis_opt_path_hides'); a.setAttribute('type', 'text/css'); if (a.styleSheet) a.styleSheet.cssText = b; else a.appendChild(d.createTextNode(b)); h.appendChild(a); return settings_timer; } }; })(); _vwo_settings_timer = _vwo_code.init(); </script> <script src="https://cdn.jsdelivr.net/npm/@docsearch/js@3"></script> <script type="text/javascript"> docsearch({ appId: '05Y6TLHNFZ', apiKey: '80483bfe28d9fd036a11a6f6a06454f8', indexName: 'konghq', container: '#getkong-algolia-search-input', disableUserPersonalization: true, placeholder: 'Search the docs...', // Override selected event to allow for local environment navigation transformItems(items) { return items.map((item) => { var modifiedUrl = window.location.protocol + '//' + window.location.host + item.url.split('docs.konghq.com')[1]; return { ...item, url: modifiedUrl }; }); }, translations: { button: { buttonText: 'Search the docs..', buttonAriaLabel: 'Search the docs...' } }, resultsFooterComponent({ state }) { var facetParameters = {}; facetParameters = {"version[0]":"latest","product[0]":"Plugin Hub"}; var queryParams = new URLSearchParams(facetParameters); queryParams.set('query', state.query); return { // The HTML `tag` type: 'a', ref: undefined, constructor: undefined, key: state.query, // Its props props: { href: `/search/?${queryParams.toString()}`, target: '_blank', // Raw text rendered in the HTML element children: 'See more >' }, __v: null, }; }, searchParameters: { optionalFilters: ['product:Kong Konnect<score=1>', 'product:Kong Gateway<score=2>', 'product:Plugin Hub<score=3>'], facetFilters: [ 'version:latest'] } }); </script> </body> </html>