CINXE.COM
Inference Endpoints by Hugging Face
<!doctype html> <html lang="en"> <head> <meta charset="utf-8" /> <!-- Fonts --> <link href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:ital,wght@0,200;0,300;0,400;0,600;0,700;0,900;1,200;1,300;1,400;1,600;1,700;1,900&display=swap" rel="stylesheet" /> <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600;700&display=swap" rel="stylesheet" /> <!-- Favicons--> <link rel="apple-touch-icon" sizes="180x180" href="/favicons/apple-touch-icon.png" /> <link rel="icon" type="image/png" sizes="32x32" href="/favicons/favicon-32x32.png" /> <link rel="icon" type="image/png" sizes="16x16" href="/favicons/favicon-16x16.png" /> <link rel="manifest" href="/favicons/site.webmanifest" /> <link rel="mask-icon" href="/favicons/safari-pinned-tab.svg" color="#5bbad5" /> <link rel="shortcut icon" href="/favicons/favicon.ico" /> <meta name="msapplication-TileColor" content="#00aba9" /> <meta name="msapplication-config" content="/favicons/browserconfig.xml" /> <meta name="theme-color" content="#ffffff" /> <!-- Viewport --> <meta name="viewport" content="width=device-width, initial-scale=1" /> <link href="/_app/immutable/assets/0.B192vDLE.css" rel="stylesheet"><title>Inference Endpoints by Hugging Face</title><!-- HEAD_svelte-1w2qwih_START --><meta name="description" content="Deploy any AI model from the Hugging Face Hub in minutes."><!-- HTML_TAG_START --><script type="application/ld+json">{ "@context": "https://schema.org/", "@type": "Product", "name": "Inference Endpoints by Hugging Face", "description": "Deploy popular AI models in minutes using Inference Endpoint.", "review": [ { "@type": "Review", "name": "Inference Endpoint review", "author": { "@type": "Person", "name": "Bryce Harlan" }, "positiveNotes": { "@type": "ItemList", "itemListElement": [ { "@type": "ListItem", "position": 1, "name": "It took off a week's worth of developer time." } ] } } ] }</script><!-- HTML_TAG_END --><!-- HEAD_svelte-1w2qwih_END --> </head> <body> <div style="display: contents"> <div class="relative min-h-screen w-full min-w-full max-w-full overflow-x-hidden"><header class="mx-auto flex h-12 w-full border-b bg-gradient-to-b from-white to-gray-50"><div class="mx-auto flex w-full max-w-7xl items-center justify-between px-3 sm:px-6 2xl:max-w-screen-2xl"><div class="flex flex-1 items-center"><a href="/" class="flex items-center" data-sveltekit-preload-data><img src="/logo-endpoints.svg" alt="" class="mr-3 h-5"> <div class="text-[1.1rem] font-semibold" data-svelte-h="svelte-dpaup1"><span class="hidden lg:inline">Inference</span> Endpoints</div></a></div> <div class="relative md:hidden order-1 ml-4"> <button class="!py-1.5 !px-1.5 !rounded-md !border-gray-300 !text-base btn w-full text-sm cursor-pointer" type="button"> <svg class=" !mr-0" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path fill="currentColor" d="M3 4h18v2H3V4Zm0 7h18v2H3v-2Zm0 7h18v2H3v-2Z"></path></svg> </button> </div> <ul class="mr-1 hidden items-center space-x-5 text-smd md:flex"> <li><a class="group flex max-w-32 items-center overflow-hidden hover:text-blue-700" data-sveltekit-preload-data href="https://huggingface.co/docs/inference-endpoints" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" class="flex-none text-base mr-1.5 group-hover:text-blue-700" width="1em" height="1em" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="1.7"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.253v13m0-13C10.832 5.477 9.246 5 7.5 5S4.168 5.477 3 6.253v13C4.168 18.477 5.754 18 7.5 18s3.332.477 4.5 1.253m0-13C13.168 5.477 14.754 5 16.5 5c1.747 0 3.332.477 4.5 1.253v13C19.832 18.477 18.247 18 16.5 18c-1.746 0-3.332.477-4.5 1.253"></path></svg> Docs</a></li> <li><a class="group flex max-w-32 items-center overflow-hidden hover:text-blue-700" data-sveltekit-preload-data href="mailto:api-enterprise@huggingface.co?subject=HF%20Inference%20Endpoints%3A%20%3CISSUE%3E&body=Hello%2C%0A%0AUsername%3A%20%3CUSERNAME%3E%0AEndpoint%20owner%3A%20%3CUSER%20OR%20ORG%20NAME%3E%0AEndpoint%20name%3A%20%3CINFERENCE-ENDPOINT-NAME%3E%0ASecurity%20level%3A%20%3C%22PUBLIC%22%20%7C%20%22PROTECTED%22%20%7C%20%22PRIVATE%22%3E%0AInstance%20type%3A%20%3CPROVIDER%3E%20%3CCPU%20%7C%20GPU%3E%20%3CINSTANCE%20DETAILS%3E%0ARepository%20id%3A%20%3CHF-REPOSITORY%3E%0AIs%20the%20deployed%20model%20up-to-date%3F%20%3CYES%20%7C%20NO%3E%0AHave%20you%20checked%20the%20logs%3F%20%3CYES%20%7C%20NO%3E%0AHave%20you%20checked%20the%20documentation%3F%20%3CYES%20%7C%20NO%3E%0A%0ADescribe%20your%20issue%3A%0A%0A" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" class="flex-none text-base mr-1.5 group-hover:text-blue-700" width="1em" height="1em" fill="currentColor" stroke="currentColor" viewBox="0 0 32 32"><path d="M16 21C14.8449 21.001 13.7075 20.7158 12.6896 20.1697C11.6717 19.6237 10.805 18.8339 10.167 17.871L11.833 16.764C12.2891 17.4517 12.9083 18.0159 13.6353 18.4062C14.3624 18.7964 15.1748 19.0007 16 19.0007C16.8252 19.0007 17.6376 18.7964 18.3647 18.4062C19.0917 18.0159 19.7109 17.4517 20.167 16.764L21.833 17.871C21.195 18.8339 20.3283 19.6237 19.3104 20.1697C18.2925 20.7158 17.1551 21.001 16 21ZM20 10C19.6044 10 19.2178 10.1173 18.8889 10.3371C18.56 10.5568 18.3036 10.8692 18.1522 11.2346C18.0009 11.6001 17.9613 12.0022 18.0384 12.3902C18.1156 12.7782 18.3061 13.1345 18.5858 13.4142C18.8655 13.6939 19.2219 13.8844 19.6098 13.9616C19.9978 14.0388 20.3999 13.9991 20.7654 13.8478C21.1308 13.6964 21.4432 13.44 21.6629 13.1112C21.8827 12.7823 22 12.3956 22 12C22.0027 11.7366 21.9528 11.4754 21.8532 11.2315C21.7536 10.9876 21.6064 10.7661 21.4202 10.5798C21.2339 10.3936 21.0124 10.2464 20.7685 10.1468C20.5247 10.0472 20.2634 9.99734 20 10ZM12 10C11.6044 10 11.2178 10.1173 10.8889 10.3371C10.56 10.5568 10.3036 10.8692 10.1522 11.2346C10.0009 11.6001 9.96126 12.0022 10.0384 12.3902C10.1156 12.7782 10.3061 13.1345 10.5858 13.4142C10.8655 13.6939 11.2219 13.8844 11.6098 13.9616C11.9978 14.0388 12.3999 13.9991 12.7654 13.8478C13.1308 13.6964 13.4432 13.44 13.6629 13.1112C13.8827 12.7823 14 12.3956 14 12C14.0027 11.7366 13.9528 11.4754 13.8532 11.2315C13.7536 10.9876 13.6064 10.7661 13.4202 10.5798C13.2339 10.3936 13.0124 10.2464 12.7685 10.1468C12.5247 10.0472 12.2634 9.99734 12 10Z" stroke-width="0.2"></path><path d="M17.736 32L16 31L20 24H26C26.2628 24.0004 26.523 23.9489 26.7658 23.8486C27.0087 23.7482 27.2293 23.6009 27.4151 23.4151C27.6009 23.2293 27.7482 23.0087 27.8486 22.7658C27.9489 22.523 28.0004 22.2628 28 22V8C28.0004 7.73725 27.9489 7.477 27.8486 7.23417C27.7482 6.99134 27.6009 6.7707 27.4151 6.58491C27.2293 6.39911 27.0087 6.25181 26.7658 6.15144C26.523 6.05107 26.2628 5.9996 26 6H6C5.73725 5.9996 5.477 6.05107 5.23417 6.15144C4.99134 6.25181 4.7707 6.39911 4.58491 6.58491C4.39911 6.7707 4.25181 6.99134 4.15144 7.23417C4.05107 7.477 3.9996 7.73725 4 8V22C3.9996 22.2628 4.05107 22.523 4.15144 22.7658C4.25181 23.0087 4.39911 23.2293 4.58491 23.4151C4.7707 23.6009 4.99134 23.7482 5.23417 23.8486C5.477 23.9489 5.73725 24.0004 6 24H15V26H6C4.93913 26 3.92172 25.5786 3.17157 24.8284C2.42143 24.0783 2 23.0609 2 22V8C2 6.93913 2.42143 5.92172 3.17157 5.17157C3.92172 4.42143 4.93913 4 6 4H26C27.0609 4 28.0783 4.42143 28.8284 5.17157C29.5786 5.92172 30 6.93913 30 8V22C30 23.0609 29.5786 24.0783 28.8284 24.8284C28.0783 25.5786 27.0609 26 26 26H21.165L17.736 32Z" stroke-width="0.2"></path></svg> Support</a></li></ul> <button class="ml-5 rounded-full border border-transparent border-t-transparent bg-blue-600 px-3 py-1 text-smd leading-tight text-white hover:border-blue-600 hover:bg-blue-50 hover:text-blue-700 hover:shadow" type="button" data-svelte-h="svelte-cgpgar">Log In</button></div></header> <main class="min-h-[calc(100vh-3rem)] bg-fixed bg-gradient-to-tr from-purple-100 via-white to-blue-100"><div class="mx-auto w-full max-w-7xl px-3 sm:px-6 2xl:max-w-screen-2xl pb-32 pt-6 space-y-16 pb-16 md:space-y-20 lg:space-y-32 !pt-8 md:!pt-16 lg:!pt-24 2xl:pt-32"><section class="flex flex-col items-center justify-around space-y-12 lg:flex-row lg:space-x-16 lg:space-y-0"><div class="max-w-xl space-y-7"><div data-svelte-h="svelte-t5lsvw"><h2 class="text-4xl font-bold">Machine Learning At Your Service</h2> <a class="-mb-1.5 mt-0.5 flex items-center" href="https://huggingface.co" target="_blank"><span class="mr-1">by</span> <img alt="" class="mr-1 h-7" src="/logo-huggingface.svg"> <span class="font-bold">Hugging Face</span></a></div> <p class="text-xl text-gray-500" data-svelte-h="svelte-1nbu6ni">Easily deploy Transformers, Diffusers or any model on dedicated, fully managed infrastructure. Keep your costs low with our secure, compliant and flexible production solution.</p> <div class="flex items-center space-x-4 font-semibold"><button class="rounded-md border border-blue-600 bg-blue-600 px-5 py-1.5 text-white hover:-translate-y-0.5 hover:bg-blue-700 hover:shadow-md" type="button" data-svelte-h="svelte-1jmri5k">Log In</button> <a class="rounded-md border border-blue-600 bg-transparent px-5 py-1.5 text-blue-600 hover:-translate-y-0.5 hover:shadow-md" href="https://huggingface.co/docs/inference-endpoints" target="_blank" data-svelte-h="svelte-18la9zj">Learn More</a></div> <p class="text-sm text-gray-500">No Hugging Face account ? <a class="underline hover:text-gray-800" href="https://huggingface.co/join?next=https%3A%2F%2Fendpoints.huggingface.co%2F">Sign up</a>!</p></div> <div class="shadow-alternate-xl max-w-xl rounded-lg border border-gray-100" data-svelte-h="svelte-1uk8zb2"><img class="h-auto w-full rounded-lg" src="/landing/screenshot.png" alt=""></div></section> <section class="flex flex-col items-center "><div class="w-full max-w-xl lg:max-w-full"><header class="mb-8"><h2 id="pricing" class="mb-2 text-3xl font-bold ">One-click inference deployment</h2> <p class="text-lg text-gray-500 ">Import your favorite model from the Hugging Face hub or browse our catalog of hand-picked, ready-to-deploy models !</p></header> <div class="mb-8 grid grid-cols-1 gap-6 lg:grid-cols-3"> <a href="/new?repository=google%2Fgemma-2-27b-it&vendor=aws&region=us-east-1&accelerator=gpu&instance_id=aws-us-east-1-nvidia-l4-x4&task=text-generation&no_suggested_compute=true}" class="border-gray-150 group/wrapper flex flex-col rounded-xl border bg-white px-5 pt-3 transition-all hover:-translate-y-1 hover:shadow-md !border-gray-100 shadow-lg"><div class="w-full flex-1"><div class="inline-flex max-w-full items-center overflow-hidden py-0.5 text-lg font-semibold lowercase leading-tight"><img alt="Author avatar" class="rounded-full shadow !shadow-none !rounded flex-none -mb-[1px] w-4 h-4 mr-2.5" src="https://huggingface.co/api/organizations/google/avatar"> <div class="mr-0.5 max-w-0 flex-none overflow-hidden whitespace-nowrap font-normal transition-all delay-100 group-hover/wrapper:max-w-full">google /</div> <h4 class="truncate">gemma-2-27b-it</h4></div></div> <div class="mt-1 flex items-center gap-x-2 gap-y-1 text-sm leading-normal text-gray-400"><div class="flex items-center overflow-hidden"> <div class="truncate">Text Generation</div></div> <div class="group/accelerated relative flex flex-none items-center"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" class="mr-1" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 36 36"><path fill="currentColor" d="M10.52 34h-3a1 1 0 0 1-.88-1.44L12.55 21H6a1 1 0 0 1-.85-1.54l10.68-17a1 1 0 0 1 .81-.46h13.43a1 1 0 0 1 .77 1.69L21.78 14h5.38a1 1 0 0 1 .73 1.66l-16.63 18a1 1 0 0 1-.74.34m-1.34-2h.91l14.77-16h-5.27a1 1 0 0 1-.77-1.69L27.88 4H17.19L7.77 19h6.43a1 1 0 0 1 .88 1.44Z" class="clr-i-outline clr-i-outline-path-1"></path><path fill="none" d="M0 0h36v36H0z"></path></svg> TGI <div class="pointer-events-none absolute top-full translate-y-2 transform rounded bg-gradient-to-t from-gray-900 to-slate-700 px-3 py-2 text-left font-normal leading-tight text-white shadow transition-opacity left-1/2 -translate-x-1/2 opacity-0 group-hover/accelerated:opacity-100 !w-48 text-sm"><div class="absolute bottom-full h-0 w-0 transform border-4 border-t-0 border-slate-700 left-1/2 -translate-x-1/2 " style="border-left-color: transparent; border-right-color: transparent; "></div> Accelerated Text Generation Inference</div></div> </div> <div class="-mx-5 mt-2.5 flex items-center overflow-hidden whitespace-nowrap rounded-b-xl border-t bg-gradient-to-r from-blue-50 via-white to-violet-50 p-2.5"><div class="border-gray-150 mr-2 flex items-center overflow-hidden rounded-full border bg-white px-2.5 py-1 text-smd leading-tight text-gray-500 shadow-sm" title="GPU 4x Nvidia L4"><span class="truncate"><span class="mr-0.5 font-semibold uppercase text-gray-400">GPU</span> 4x Nvidia L4</span></div> <div class="border-gray-150 ml-auto flex items-center overflow-hidden rounded-full border bg-gradient-to-tr from-blue-500 via-blue-500 to-blue-500 px-2.5 py-1 text-smd leading-tight text-gray-500 text-white shadow-sm group-hover/wrapper:to-purple-500" title="$ 3.8 / hour when running"><span class="truncate"><strong class="mr-0.5" data-svelte-h="svelte-1t6f49e">$</strong> 3.8</span></div></div></a> <a href="/new?repository=meta-llama%2FLlama-3.1-70B-Instruct&vendor=aws&region=us-east-1&accelerator=gpu&instance_id=aws-us-east-1-nvidia-l40s-x4&task=text-generation&no_suggested_compute=true}" class="border-gray-150 group/wrapper flex flex-col rounded-xl border bg-white px-5 pt-3 transition-all hover:-translate-y-1 hover:shadow-md !border-gray-100 shadow-lg"><div class="w-full flex-1"><div class="inline-flex max-w-full items-center overflow-hidden py-0.5 text-lg font-semibold lowercase leading-tight"><img alt="Author avatar" class="rounded-full shadow !shadow-none !rounded flex-none -mb-[1px] w-4 h-4 mr-2.5" src="https://huggingface.co/api/organizations/meta-llama/avatar"> <div class="mr-0.5 max-w-0 flex-none overflow-hidden whitespace-nowrap font-normal transition-all delay-100 group-hover/wrapper:max-w-full">meta-llama /</div> <h4 class="truncate">Llama-3.1-70B-Instruct</h4></div></div> <div class="mt-1 flex items-center gap-x-2 gap-y-1 text-sm leading-normal text-gray-400"><div class="flex items-center overflow-hidden"> <div class="truncate">Text Generation</div></div> <div class="group/accelerated relative flex flex-none items-center"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" class="mr-1" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 36 36"><path fill="currentColor" d="M10.52 34h-3a1 1 0 0 1-.88-1.44L12.55 21H6a1 1 0 0 1-.85-1.54l10.68-17a1 1 0 0 1 .81-.46h13.43a1 1 0 0 1 .77 1.69L21.78 14h5.38a1 1 0 0 1 .73 1.66l-16.63 18a1 1 0 0 1-.74.34m-1.34-2h.91l14.77-16h-5.27a1 1 0 0 1-.77-1.69L27.88 4H17.19L7.77 19h6.43a1 1 0 0 1 .88 1.44Z" class="clr-i-outline clr-i-outline-path-1"></path><path fill="none" d="M0 0h36v36H0z"></path></svg> TGI <div class="pointer-events-none absolute top-full translate-y-2 transform rounded bg-gradient-to-t from-gray-900 to-slate-700 px-3 py-2 text-left font-normal leading-tight text-white shadow transition-opacity left-1/2 -translate-x-1/2 opacity-0 group-hover/accelerated:opacity-100 !w-48 text-sm"><div class="absolute bottom-full h-0 w-0 transform border-4 border-t-0 border-slate-700 left-1/2 -translate-x-1/2 " style="border-left-color: transparent; border-right-color: transparent; "></div> Accelerated Text Generation Inference</div></div> </div> <div class="-mx-5 mt-2.5 flex items-center overflow-hidden whitespace-nowrap rounded-b-xl border-t bg-gradient-to-r from-blue-50 via-white to-violet-50 p-2.5"><div class="border-gray-150 mr-2 flex items-center overflow-hidden rounded-full border bg-white px-2.5 py-1 text-smd leading-tight text-gray-500 shadow-sm" title="GPU 4x Nvidia L40S"><span class="truncate"><span class="mr-0.5 font-semibold uppercase text-gray-400">GPU</span> 4x Nvidia L40S</span></div> <div class="border-gray-150 ml-auto flex items-center overflow-hidden rounded-full border bg-gradient-to-tr from-blue-500 via-blue-500 to-blue-500 px-2.5 py-1 text-smd leading-tight text-gray-500 text-white shadow-sm group-hover/wrapper:to-purple-500" title="$ 8.3 / hour when running"><span class="truncate"><strong class="mr-0.5" data-svelte-h="svelte-1t6f49e">$</strong> 8.3</span></div></div></a> <a href="/new?repository=Qwen%2FQwen2.5-Coder-7B-Instruct&vendor=aws&region=us-east-1&accelerator=gpu&instance_id=aws-us-east-1-nvidia-l40s-x1&task=text-generation&no_suggested_compute=true}" class="border-gray-150 group/wrapper flex flex-col rounded-xl border bg-white px-5 pt-3 transition-all hover:-translate-y-1 hover:shadow-md !border-gray-100 shadow-lg"><div class="w-full flex-1"><div class="inline-flex max-w-full items-center overflow-hidden py-0.5 text-lg font-semibold lowercase leading-tight"><img alt="Author avatar" class="rounded-full shadow !shadow-none !rounded flex-none -mb-[1px] w-4 h-4 mr-2.5" src="https://huggingface.co/api/organizations/Qwen/avatar"> <div class="mr-0.5 max-w-0 flex-none overflow-hidden whitespace-nowrap font-normal transition-all delay-100 group-hover/wrapper:max-w-full">Qwen /</div> <h4 class="truncate">Qwen2.5-Coder-7B-Instruct</h4></div></div> <div class="mt-1 flex items-center gap-x-2 gap-y-1 text-sm leading-normal text-gray-400"><div class="flex items-center overflow-hidden"> <div class="truncate">Text Generation</div></div> <div class="group/accelerated relative flex flex-none items-center"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" class="mr-1" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 36 36"><path fill="currentColor" d="M10.52 34h-3a1 1 0 0 1-.88-1.44L12.55 21H6a1 1 0 0 1-.85-1.54l10.68-17a1 1 0 0 1 .81-.46h13.43a1 1 0 0 1 .77 1.69L21.78 14h5.38a1 1 0 0 1 .73 1.66l-16.63 18a1 1 0 0 1-.74.34m-1.34-2h.91l14.77-16h-5.27a1 1 0 0 1-.77-1.69L27.88 4H17.19L7.77 19h6.43a1 1 0 0 1 .88 1.44Z" class="clr-i-outline clr-i-outline-path-1"></path><path fill="none" d="M0 0h36v36H0z"></path></svg> TGI <div class="pointer-events-none absolute top-full translate-y-2 transform rounded bg-gradient-to-t from-gray-900 to-slate-700 px-3 py-2 text-left font-normal leading-tight text-white shadow transition-opacity left-1/2 -translate-x-1/2 opacity-0 group-hover/accelerated:opacity-100 !w-48 text-sm"><div class="absolute bottom-full h-0 w-0 transform border-4 border-t-0 border-slate-700 left-1/2 -translate-x-1/2 " style="border-left-color: transparent; border-right-color: transparent; "></div> Accelerated Text Generation Inference</div></div> </div> <div class="-mx-5 mt-2.5 flex items-center overflow-hidden whitespace-nowrap rounded-b-xl border-t bg-gradient-to-r from-blue-50 via-white to-violet-50 p-2.5"><div class="border-gray-150 mr-2 flex items-center overflow-hidden rounded-full border bg-white px-2.5 py-1 text-smd leading-tight text-gray-500 shadow-sm" title="GPU 1x Nvidia L40S"><span class="truncate"><span class="mr-0.5 font-semibold uppercase text-gray-400">GPU</span> 1x Nvidia L40S</span></div> <div class="border-gray-150 ml-auto flex items-center overflow-hidden rounded-full border bg-gradient-to-tr from-blue-500 via-blue-500 to-blue-500 px-2.5 py-1 text-smd leading-tight text-gray-500 text-white shadow-sm group-hover/wrapper:to-purple-500" title="$ 1.8 / hour when running"><span class="truncate"><strong class="mr-0.5" data-svelte-h="svelte-1t6f49e">$</strong> 1.8</span></div></div></a> <a href="/new?repository=black-forest-labs%2FFLUX.1-schnell&vendor=aws&region=us-east-1&accelerator=gpu&instance_id=aws-us-east-1-nvidia-l40s-x1&task=text-to-image&no_suggested_compute=true}" class="border-gray-150 group/wrapper flex flex-col rounded-xl border bg-white px-5 pt-3 transition-all hover:-translate-y-1 hover:shadow-md !border-gray-100 shadow-lg"><div class="w-full flex-1"><div class="inline-flex max-w-full items-center overflow-hidden py-0.5 text-lg font-semibold lowercase leading-tight"><img alt="Author avatar" class="rounded-full shadow !shadow-none !rounded flex-none -mb-[1px] w-4 h-4 mr-2.5" src="https://huggingface.co/api/organizations/black-forest-labs/avatar"> <div class="mr-0.5 max-w-0 flex-none overflow-hidden whitespace-nowrap font-normal transition-all delay-100 group-hover/wrapper:max-w-full">black-forest-labs /</div> <h4 class="truncate">FLUX.1-schnell</h4></div></div> <div class="mt-1 flex items-center gap-x-2 gap-y-1 text-sm leading-normal text-gray-400"><div class="flex items-center overflow-hidden"> <div class="truncate">Text-to-Image</div></div> </div> <div class="-mx-5 mt-2.5 flex items-center overflow-hidden whitespace-nowrap rounded-b-xl border-t bg-gradient-to-r from-blue-50 via-white to-violet-50 p-2.5"><div class="border-gray-150 mr-2 flex items-center overflow-hidden rounded-full border bg-white px-2.5 py-1 text-smd leading-tight text-gray-500 shadow-sm" title="GPU 1x Nvidia L40S"><span class="truncate"><span class="mr-0.5 font-semibold uppercase text-gray-400">GPU</span> 1x Nvidia L40S</span></div> <div class="border-gray-150 ml-auto flex items-center overflow-hidden rounded-full border bg-gradient-to-tr from-blue-500 via-blue-500 to-blue-500 px-2.5 py-1 text-smd leading-tight text-gray-500 text-white shadow-sm group-hover/wrapper:to-purple-500" title="$ 1.8 / hour when running"><span class="truncate"><strong class="mr-0.5" data-svelte-h="svelte-1t6f49e">$</strong> 1.8</span></div></div></a> <a href="/new?repository=mixedbread-ai%2Fmxbai-embed-large-v1&vendor=aws&region=us-east-1&accelerator=gpu&instance_id=aws-us-east-1-nvidia-l4-x1&task=sentence-embeddings&no_suggested_compute=true}" class="border-gray-150 group/wrapper flex flex-col rounded-xl border bg-white px-5 pt-3 transition-all hover:-translate-y-1 hover:shadow-md !border-gray-100 shadow-lg"><div class="w-full flex-1"><div class="inline-flex max-w-full items-center overflow-hidden py-0.5 text-lg font-semibold lowercase leading-tight"><img alt="Author avatar" class="rounded-full shadow !shadow-none !rounded flex-none -mb-[1px] w-4 h-4 mr-2.5" src="https://huggingface.co/api/organizations/mixedbread-ai/avatar"> <div class="mr-0.5 max-w-0 flex-none overflow-hidden whitespace-nowrap font-normal transition-all delay-100 group-hover/wrapper:max-w-full">mixedbread-ai /</div> <h4 class="truncate">mxbai-embed-large-v1</h4></div></div> <div class="mt-1 flex items-center gap-x-2 gap-y-1 text-sm leading-normal text-gray-400"><div class="flex items-center overflow-hidden"> <div class="truncate">Sentence Embeddings</div></div> <div class="group/accelerated relative flex flex-none items-center"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" class="mr-1" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 36 36"><path fill="currentColor" d="M10.52 34h-3a1 1 0 0 1-.88-1.44L12.55 21H6a1 1 0 0 1-.85-1.54l10.68-17a1 1 0 0 1 .81-.46h13.43a1 1 0 0 1 .77 1.69L21.78 14h5.38a1 1 0 0 1 .73 1.66l-16.63 18a1 1 0 0 1-.74.34m-1.34-2h.91l14.77-16h-5.27a1 1 0 0 1-.77-1.69L27.88 4H17.19L7.77 19h6.43a1 1 0 0 1 .88 1.44Z" class="clr-i-outline clr-i-outline-path-1"></path><path fill="none" d="M0 0h36v36H0z"></path></svg> TEI <div class="pointer-events-none absolute top-full translate-y-2 transform rounded bg-gradient-to-t from-gray-900 to-slate-700 px-3 py-2 text-left font-normal leading-tight text-white shadow transition-opacity left-1/2 -translate-x-1/2 opacity-0 group-hover/accelerated:opacity-100 !w-48 text-sm"><div class="absolute bottom-full h-0 w-0 transform border-4 border-t-0 border-slate-700 left-1/2 -translate-x-1/2 " style="border-left-color: transparent; border-right-color: transparent; "></div> Accelerated Text Embeddings Inference</div></div> </div> <div class="-mx-5 mt-2.5 flex items-center overflow-hidden whitespace-nowrap rounded-b-xl border-t bg-gradient-to-r from-blue-50 via-white to-violet-50 p-2.5"><div class="border-gray-150 mr-2 flex items-center overflow-hidden rounded-full border bg-white px-2.5 py-1 text-smd leading-tight text-gray-500 shadow-sm" title="GPU 1x Nvidia L4"><span class="truncate"><span class="mr-0.5 font-semibold uppercase text-gray-400">GPU</span> 1x Nvidia L4</span></div> <div class="border-gray-150 ml-auto flex items-center overflow-hidden rounded-full border bg-gradient-to-tr from-blue-500 via-blue-500 to-blue-500 px-2.5 py-1 text-smd leading-tight text-gray-500 text-white shadow-sm group-hover/wrapper:to-purple-500" title="$ 0.8 / hour when running"><span class="truncate"><strong class="mr-0.5" data-svelte-h="svelte-1t6f49e">$</strong> 0.8</span></div></div></a> <a href="/new?repository=openai%2Fwhisper-large-v3-turbo&vendor=aws&region=us-east-1&accelerator=gpu&instance_id=aws-us-east-1-nvidia-t4-x1&task=automatic-speech-recognition&no_suggested_compute=true}" class="border-gray-150 group/wrapper flex flex-col rounded-xl border bg-white px-5 pt-3 transition-all hover:-translate-y-1 hover:shadow-md !border-gray-100 shadow-lg"><div class="w-full flex-1"><div class="inline-flex max-w-full items-center overflow-hidden py-0.5 text-lg font-semibold lowercase leading-tight"><img alt="Author avatar" class="rounded-full shadow !shadow-none !rounded flex-none -mb-[1px] w-4 h-4 mr-2.5" src="https://huggingface.co/api/organizations/openai/avatar"> <div class="mr-0.5 max-w-0 flex-none overflow-hidden whitespace-nowrap font-normal transition-all delay-100 group-hover/wrapper:max-w-full">openai /</div> <h4 class="truncate">whisper-large-v3-turbo</h4></div></div> <div class="mt-1 flex items-center gap-x-2 gap-y-1 text-sm leading-normal text-gray-400"><div class="flex items-center overflow-hidden"> <div class="truncate">Automatic Speech Recognition</div></div> </div> <div class="-mx-5 mt-2.5 flex items-center overflow-hidden whitespace-nowrap rounded-b-xl border-t bg-gradient-to-r from-blue-50 via-white to-violet-50 p-2.5"><div class="border-gray-150 mr-2 flex items-center overflow-hidden rounded-full border bg-white px-2.5 py-1 text-smd leading-tight text-gray-500 shadow-sm" title="GPU 1x Nvidia T4"><span class="truncate"><span class="mr-0.5 font-semibold uppercase text-gray-400">GPU</span> 1x Nvidia T4</span></div> <div class="border-gray-150 ml-auto flex items-center overflow-hidden rounded-full border bg-gradient-to-tr from-blue-500 via-blue-500 to-blue-500 px-2.5 py-1 text-smd leading-tight text-gray-500 text-white shadow-sm group-hover/wrapper:to-purple-500" title="$ 0.5 / hour when running"><span class="truncate"><strong class="mr-0.5" data-svelte-h="svelte-1t6f49e">$</strong> 0.5</span></div></div></a></div> <div class="flex items-center space-x-4 font-semibold"><a class="flex items-center rounded-full border border-blue-600 px-4 py-2 font-semibold text-blue-600 hover:-translate-y-0.5 hover:shadow-md" href="/catalog"><svg xmlns="http://www.w3.org/2000/svg" class="mr-1.5" width="1em" height="1em" viewBox="0 0 32 32" fill="none" stroke="currentColor"><path d="M28 23.9871V18.6185C28 18.1074 27.7282 17.6349 27.2863 17.3779L21.9999 14.3033V8.10464C21.9999 7.59479 21.7294 7.12321 21.2893 6.8658L16.7244 4.1963C16.2769 3.93457 15.723 3.93457 15.2755 4.1963L10.7106 6.8658C10.2705 7.12318 9.99999 7.59479 9.99999 8.10464V14.2842L4.71068 17.3773C4.27052 17.6347 4 18.1063 4 18.6162V23.9871C4 24.4994 4.27305 24.9728 4.71645 25.2294L9.2813 27.8704C9.7259 28.1276 10.2741 28.1276 10.7187 27.8704L15.9999 24.8149L21.2812 27.8704C21.7258 28.1276 22.274 28.1276 22.7186 27.8704L27.2834 25.2294C27.7268 24.9728 28 24.4994 28 23.9871Z" stroke-linejoin="round" stroke-width="2.5"></path><path d="M18.9999 16.0389L16.7186 17.3588C16.274 17.616 15.7259 17.616 15.2813 17.3588L13 16.0389M15.9999 17.7929V21.3039" stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5"></path></svg> Browse Catalog</a> <a class="flex items-center rounded-full border border-blue-600 px-4 py-2 font-semibold text-blue-600 hover:-translate-y-0.5 hover:shadow-md" href="https://huggingface.co/models" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" class="mr-2" width="1em" height="1em" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2"><path stroke-linecap="round" stroke-linejoin="round" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14"></path></svg> Hub Models</a></div></div></section> <section class="flex flex-col items-center "><div class="w-full max-w-xl lg:max-w-full"><header class="mb-8"><h2 id="pricing" class="mb-2 text-3xl font-bold lg:text-center">Customer Stories</h2> <p class="text-lg text-gray-500 lg:text-center">Learn how leading AI teams use Inference Endpoints to deploy their models</p></header> <div><nav class="mb-6 grid grid-cols-2 gap-4 lg:flex lg:justify-start lg:gap-0 lg:space-x-4"><button class="rounded-xl border border-b-2 bg-white px-5 py-2 text-center text-xl text-gray-500 border-blue-600" type="button"><div class="flex h-10 items-center justify-center"><img alt="0's logo" class="h-8" src="https://cdn-media.huggingface.co/marketing/inference-endpoints-page/musixmatch.svg"></div> </button><button class="rounded-xl border border-b-2 bg-white px-5 py-2 text-center text-xl text-gray-500 border-gray-200" type="button"><div class="flex h-10 items-center justify-center"><img alt="1's logo" class="h-10" src="https://cdn-media.huggingface.co/marketing/inference-endpoints-page/phamily.png"></div> </button><button class="rounded-xl border border-b-2 bg-white px-5 py-2 text-center text-xl text-gray-500 border-gray-200" type="button"><div class="flex h-10 items-center justify-center"><img alt="2's logo" class="h-6" src="https://cdn-media.huggingface.co/marketing/inference-endpoints-page/pinecone.svg"></div> </button><button class="rounded-xl border border-b-2 bg-white px-5 py-2 text-center text-xl text-gray-500 border-gray-200" type="button"><div class="flex h-10 items-center justify-center"><img alt="3's logo" class="h-6 mt-2" src="https://cdn-media.huggingface.co/marketing/inference-endpoints-page/waymark.png"></div> </button></nav> <div class="lg:flex items-start space-y-6 lg:space-y-0"><div class="w-full rounded-3xl border border-gray-100 bg-white p-7 shadow-lg lg:order-last"><h4 class="mb-1 text-2xl text-gray-900">Endpoints for <span class="font-bold">Music </span></h4> <p class="text-lg text-gray-600">Musixmatch is the world鈥檚 leading music data company</p> <div class="mt-4 w-full overflow-hidden rounded-lg border bg-transparent"><div class="aspect-h-9 aspect-w-16"><iframe class="h-full w-full" title="Musixmatch Video Testimonial" src="https://www.youtube.com/embed/aD659eVo7pQ" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></div> </div></div> <div class="w-full space-y-6 lg:mr-6 lg:w-1/2"><div class="w-full space-y-5 rounded-3xl border border-gray-100 bg-white p-7 shadow-lg"><div><div class="font-mono font-semibold"><svg class="inline -mt-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M12.1 2a9.8 9.8 0 0 0-5.4 1.6l6.4 6.4a2.1 2.1 0 0 1 .2 3a2.1 2.1 0 0 1-3-.2L3.7 6.4A9.84 9.84 0 0 0 2 12.1a10.14 10.14 0 0 0 10.1 10.1a10.9 10.9 0 0 0 2.6-.3l6.7 6.7a5 5 0 0 0 7.1-7.1l-6.7-6.7a10.9 10.9 0 0 0 .3-2.6A10 10 0 0 0 12.1 2zm8 10.1a7.61 7.61 0 0 1-.3 2.1l-.3 1.1l.8.8l6.7 6.7a2.88 2.88 0 0 1 .9 2.1A2.72 2.72 0 0 1 27 27a2.9 2.9 0 0 1-4.2 0l-6.7-6.7l-.8-.8l-1.1.3a7.61 7.61 0 0 1-2.1.3a8.27 8.27 0 0 1-5.7-2.3A7.63 7.63 0 0 1 4 12.1a8.33 8.33 0 0 1 .3-2.2l4.4 4.4a4.14 4.14 0 0 0 5.9.2a4.14 4.14 0 0 0-.2-5.9L10 4.2a6.45 6.45 0 0 1 2-.3a8.27 8.27 0 0 1 5.7 2.3a8.49 8.49 0 0 1 2.4 5.9z" fill="currentColor"></path></svg> Use Case</div> <p class="mt-1.5 text-gray-600">Custom text embeddings generation pipeline</p></div> <div><div class="font-mono font-semibold"><svg xmlns="http://www.w3.org/2000/svg" class="inline -mt-1" width="1em" height="1em" viewBox="0 0 32 32"><path fill="currentColor" d="m28.504 8.136l-12-7a1 1 0 0 0-1.008 0l-12 7A1 1 0 0 0 3 9v14a1 1 0 0 0 .496.864l12 7a1 1 0 0 0 1.008 0l12-7A1 1 0 0 0 29 23V9a1 1 0 0 0-.496-.864ZM16 3.158L26.016 9L16 14.842L5.984 9ZM5 10.74l10 5.833V28.26L5 22.426Zm12 17.52V16.574l10-5.833v11.685Z"></path></svg> Models Deployed</div> <div class="mt-1.5 text-gray-600"><ul><li class="ml-4 list-disc">Distilbert-base-uncased-finetuned-sst-2-english</li><li class="ml-4 list-disc">facebook/wav2vec2-base-960h</li><li class="ml-4 list-disc">Custom model based on sentence transformers</li> </ul></div> </div></div> <div class="w-full rounded-3xl border border-gray-100 bg-white p-7 pr-12 shadow-lg"><svg class="inline w-6 h-6 -mt-1 text-gray-600" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 30 30"><path d="M15.264 19.552C15.264 23.2 17.664 25.12 20.352 25.12C23.328 25.12 26.112 22.624 26.112 19.456C26.112 16.864 24.288 15.136 22.08 15.136C21.888 15.136 21.408 15.136 21.312 15.136C22.368 12.064 25.824 8.8 29.376 7.072L26.4 4C20.448 6.976 15.264 13.504 15.264 19.552ZM0 19.552C0 23.2 2.304 25.12 5.088 25.12C8.064 25.12 10.848 22.624 10.848 19.456C10.848 16.864 8.928 15.136 6.72 15.136C6.528 15.136 6.048 15.136 5.952 15.136C7.008 12.064 10.56 8.8 14.016 7.072L11.136 4C5.184 6.976 0 13.504 0 19.552Z" fill="currentColor"></path></svg> <blockquote class="w-96 max-w-full text-lg italic text-gray-600">The coolest thing was how easy it was to define a complete custom interface from the model to the inference process. It just took us a couple of hours to adapt our code, and have a functioning and totally custom endpoint.</blockquote> <div class="mt-4 flex items-center"><img alt="Portrait of Andrea Boscarino, Data Scientist at Musixmatch" class="mr-4 h-12 w-12 rounded-full border border-gray-100 object-cover shadow-lg" loading="lazy" src="https://cdn-media.huggingface.co/marketing/inference-endpoints-page/andrea-musixmatch2.png"> <div><div class="font-bold">Andrea Boscarino</div> <div class="text-sm">Data Scientist at Musixmatch</div> </div></div> </div></div> </div><div class="hidden items-start space-y-6 lg:space-y-0"><div class="w-full rounded-3xl border border-gray-100 bg-white p-7 shadow-lg lg:order-last"><h4 class="mb-1 text-2xl text-gray-900">Endpoints for <span class="font-bold">Health </span></h4> <p class="text-lg text-gray-600">Phamily improves patient health with intelligent care management</p> <div class="mt-4 w-full overflow-hidden rounded-lg border bg-transparent"><div class="aspect-h-9 aspect-w-16"><iframe class="h-full w-full" title="Musixmatch Video Testimonial" src="https://www.youtube.com/embed/20C9X5OYO2Q" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></div> </div></div> <div class="w-full space-y-6 lg:mr-6 lg:w-1/2"><div class="w-full space-y-5 rounded-3xl border border-gray-100 bg-white p-7 shadow-lg"><div><div class="font-mono font-semibold"><svg class="inline -mt-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M12.1 2a9.8 9.8 0 0 0-5.4 1.6l6.4 6.4a2.1 2.1 0 0 1 .2 3a2.1 2.1 0 0 1-3-.2L3.7 6.4A9.84 9.84 0 0 0 2 12.1a10.14 10.14 0 0 0 10.1 10.1a10.9 10.9 0 0 0 2.6-.3l6.7 6.7a5 5 0 0 0 7.1-7.1l-6.7-6.7a10.9 10.9 0 0 0 .3-2.6A10 10 0 0 0 12.1 2zm8 10.1a7.61 7.61 0 0 1-.3 2.1l-.3 1.1l.8.8l6.7 6.7a2.88 2.88 0 0 1 .9 2.1A2.72 2.72 0 0 1 27 27a2.9 2.9 0 0 1-4.2 0l-6.7-6.7l-.8-.8l-1.1.3a7.61 7.61 0 0 1-2.1.3a8.27 8.27 0 0 1-5.7-2.3A7.63 7.63 0 0 1 4 12.1a8.33 8.33 0 0 1 .3-2.2l4.4 4.4a4.14 4.14 0 0 0 5.9.2a4.14 4.14 0 0 0-.2-5.9L10 4.2a6.45 6.45 0 0 1 2-.3a8.27 8.27 0 0 1 5.7 2.3a8.49 8.49 0 0 1 2.4 5.9z" fill="currentColor"></path></svg> Use Case</div> <p class="mt-1.5 text-gray-600">HIPAA-compliant secure endpoints for text classification</p></div> <div><div class="font-mono font-semibold"><svg xmlns="http://www.w3.org/2000/svg" class="inline -mt-1" width="1em" height="1em" viewBox="0 0 32 32"><path fill="currentColor" d="m28.504 8.136l-12-7a1 1 0 0 0-1.008 0l-12 7A1 1 0 0 0 3 9v14a1 1 0 0 0 .496.864l12 7a1 1 0 0 0 1.008 0l12-7A1 1 0 0 0 29 23V9a1 1 0 0 0-.496-.864ZM16 3.158L26.016 9L16 14.842L5.984 9ZM5 10.74l10 5.833V28.26L5 22.426Zm12 17.52V16.574l10-5.833v11.685Z"></path></svg> Models Deployed</div> <div class="mt-1.5 text-gray-600"><ul><li class="ml-4 list-disc">Custom model based on text-classification (MPNET)</li><li class="ml-4 list-disc">Custom model based on text-classification (BERT)</li> </ul></div> </div></div> <div class="w-full rounded-3xl border border-gray-100 bg-white p-7 pr-12 shadow-lg"><svg class="inline w-6 h-6 -mt-1 text-gray-600" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 30 30"><path d="M15.264 19.552C15.264 23.2 17.664 25.12 20.352 25.12C23.328 25.12 26.112 22.624 26.112 19.456C26.112 16.864 24.288 15.136 22.08 15.136C21.888 15.136 21.408 15.136 21.312 15.136C22.368 12.064 25.824 8.8 29.376 7.072L26.4 4C20.448 6.976 15.264 13.504 15.264 19.552ZM0 19.552C0 23.2 2.304 25.12 5.088 25.12C8.064 25.12 10.848 22.624 10.848 19.456C10.848 16.864 8.928 15.136 6.72 15.136C6.528 15.136 6.048 15.136 5.952 15.136C7.008 12.064 10.56 8.8 14.016 7.072L11.136 4C5.184 6.976 0 13.504 0 19.552Z" fill="currentColor"></path></svg> <blockquote class="w-96 max-w-full text-lg italic text-gray-600">It took off a week's worth of developer time. Thanks to Inference Endpoints, we now basically spend all of our time on R&D, not fiddling with AWS. If you haven't already built a robust, performant, fault tolerant system for inference, then it's pretty much a no brainer.</blockquote> <div class="mt-4 flex items-center"><img alt="Portrait of Bryce Harlan, Senior Software Engineer at Phamily" class="mr-4 h-12 w-12 rounded-full border border-gray-100 object-cover shadow-lg" loading="lazy" src="https://cdn-media.huggingface.co/marketing/inference-endpoints-page/bryce-phamily.png"> <div><div class="font-bold">Bryce Harlan</div> <div class="text-sm">Senior Software Engineer at Phamily</div> </div></div> </div></div> </div><div class="hidden items-start space-y-6 lg:space-y-0"><div class="w-full rounded-3xl border border-gray-100 bg-white p-7 shadow-lg lg:order-last"><h4 class="mb-1 text-2xl text-gray-900">Endpoints for <span class="font-bold">Search </span></h4> <p class="text-lg text-gray-600">Pinecone is the vector database for intelligent search</p> <div class="mt-4 w-full overflow-hidden rounded-lg border bg-transparent"><div class="aspect-h-9 aspect-w-16"><iframe class="h-full w-full" title="Musixmatch Video Testimonial" src="https://www.youtube.com/embed/Q-TKSZZfQX8" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></div> </div></div> <div class="w-full space-y-6 lg:mr-6 lg:w-1/2"><div class="w-full space-y-5 rounded-3xl border border-gray-100 bg-white p-7 shadow-lg"><div><div class="font-mono font-semibold"><svg class="inline -mt-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M12.1 2a9.8 9.8 0 0 0-5.4 1.6l6.4 6.4a2.1 2.1 0 0 1 .2 3a2.1 2.1 0 0 1-3-.2L3.7 6.4A9.84 9.84 0 0 0 2 12.1a10.14 10.14 0 0 0 10.1 10.1a10.9 10.9 0 0 0 2.6-.3l6.7 6.7a5 5 0 0 0 7.1-7.1l-6.7-6.7a10.9 10.9 0 0 0 .3-2.6A10 10 0 0 0 12.1 2zm8 10.1a7.61 7.61 0 0 1-.3 2.1l-.3 1.1l.8.8l6.7 6.7a2.88 2.88 0 0 1 .9 2.1A2.72 2.72 0 0 1 27 27a2.9 2.9 0 0 1-4.2 0l-6.7-6.7l-.8-.8l-1.1.3a7.61 7.61 0 0 1-2.1.3a8.27 8.27 0 0 1-5.7-2.3A7.63 7.63 0 0 1 4 12.1a8.33 8.33 0 0 1 .3-2.2l4.4 4.4a4.14 4.14 0 0 0 5.9.2a4.14 4.14 0 0 0-.2-5.9L10 4.2a6.45 6.45 0 0 1 2-.3a8.27 8.27 0 0 1 5.7 2.3a8.49 8.49 0 0 1 2.4 5.9z" fill="currentColor"></path></svg> Use Case</div> <p class="mt-1.5 text-gray-600">Autoscaling endpoints for fast embeddings generation</p></div> <div><div class="font-mono font-semibold"><svg xmlns="http://www.w3.org/2000/svg" class="inline -mt-1" width="1em" height="1em" viewBox="0 0 32 32"><path fill="currentColor" d="m28.504 8.136l-12-7a1 1 0 0 0-1.008 0l-12 7A1 1 0 0 0 3 9v14a1 1 0 0 0 .496.864l12 7a1 1 0 0 0 1.008 0l12-7A1 1 0 0 0 29 23V9a1 1 0 0 0-.496-.864ZM16 3.158L26.016 9L16 14.842L5.984 9ZM5 10.74l10 5.833V28.26L5 22.426Zm12 17.52V16.574l10-5.833v11.685Z"></path></svg> Models Deployed</div> <div class="mt-1.5 text-gray-600"><ul><li class="ml-4 list-disc">Different sentence transformers and embedding models</li> </ul></div> </div></div> <div class="w-full rounded-3xl border border-gray-100 bg-white p-7 pr-12 shadow-lg"><svg class="inline w-6 h-6 -mt-1 text-gray-600" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 30 30"><path d="M15.264 19.552C15.264 23.2 17.664 25.12 20.352 25.12C23.328 25.12 26.112 22.624 26.112 19.456C26.112 16.864 24.288 15.136 22.08 15.136C21.888 15.136 21.408 15.136 21.312 15.136C22.368 12.064 25.824 8.8 29.376 7.072L26.4 4C20.448 6.976 15.264 13.504 15.264 19.552ZM0 19.552C0 23.2 2.304 25.12 5.088 25.12C8.064 25.12 10.848 22.624 10.848 19.456C10.848 16.864 8.928 15.136 6.72 15.136C6.528 15.136 6.048 15.136 5.952 15.136C7.008 12.064 10.56 8.8 14.016 7.072L11.136 4C5.184 6.976 0 13.504 0 19.552Z" fill="currentColor"></path></svg> <blockquote class="w-96 max-w-full text-lg italic text-gray-600">We were able to choose an off the shelf model that's very common for our customers to get started with and set it so that it can be configured to handle over 100 requests per second just with a few button clicks. With the release of the Hugging Face Inference Endpoints, we believe there's a new standard for how easy it can be to go build your first vector embedding based solution, whether it be semantic search or question answering system.</blockquote> <div class="mt-4 flex items-center"><img alt="Portrait of Gareth Jones, Senior Product Manager at Pinecone" class="mr-4 h-12 w-12 rounded-full border border-gray-100 object-cover shadow-lg" loading="lazy" src="https://cdn-media.huggingface.co/marketing/inference-endpoints-page/gareth-pinecone-2.png"> <div><div class="font-bold">Gareth Jones</div> <div class="text-sm">Senior Product Manager at Pinecone</div> </div></div> </div></div> </div><div class="hidden items-start space-y-6 lg:space-y-0"><div class="w-full rounded-3xl border border-gray-100 bg-white p-7 shadow-lg lg:order-last"><h4 class="mb-1 text-2xl text-gray-900">Endpoints for <span class="font-bold">Videos </span></h4> <p class="text-lg text-gray-600">Waymark is a AI-powered video creator</p> <div class="mt-4 w-full overflow-hidden rounded-lg border bg-transparent"><div class="aspect-h-9 aspect-w-16"><iframe class="h-full w-full" title="Musixmatch Video Testimonial" src="https://www.youtube.com/embed/KonQJmDsWlo" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe></div> </div></div> <div class="w-full space-y-6 lg:mr-6 lg:w-1/2"><div class="w-full space-y-5 rounded-3xl border border-gray-100 bg-white p-7 shadow-lg"><div><div class="font-mono font-semibold"><svg class="inline -mt-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M12.1 2a9.8 9.8 0 0 0-5.4 1.6l6.4 6.4a2.1 2.1 0 0 1 .2 3a2.1 2.1 0 0 1-3-.2L3.7 6.4A9.84 9.84 0 0 0 2 12.1a10.14 10.14 0 0 0 10.1 10.1a10.9 10.9 0 0 0 2.6-.3l6.7 6.7a5 5 0 0 0 7.1-7.1l-6.7-6.7a10.9 10.9 0 0 0 .3-2.6A10 10 0 0 0 12.1 2zm8 10.1a7.61 7.61 0 0 1-.3 2.1l-.3 1.1l.8.8l6.7 6.7a2.88 2.88 0 0 1 .9 2.1A2.72 2.72 0 0 1 27 27a2.9 2.9 0 0 1-4.2 0l-6.7-6.7l-.8-.8l-1.1.3a7.61 7.61 0 0 1-2.1.3a8.27 8.27 0 0 1-5.7-2.3A7.63 7.63 0 0 1 4 12.1a8.33 8.33 0 0 1 .3-2.2l4.4 4.4a4.14 4.14 0 0 0 5.9.2a4.14 4.14 0 0 0-.2-5.9L10 4.2a6.45 6.45 0 0 1 2-.3a8.27 8.27 0 0 1 5.7 2.3a8.49 8.49 0 0 1 2.4 5.9z" fill="currentColor"></path></svg> Use Case</div> <p class="mt-1.5 text-gray-600">Multi-modal endpoints for embeddings, audio and image generation</p></div> <div><div class="font-mono font-semibold"><svg xmlns="http://www.w3.org/2000/svg" class="inline -mt-1" width="1em" height="1em" viewBox="0 0 32 32"><path fill="currentColor" d="m28.504 8.136l-12-7a1 1 0 0 0-1.008 0l-12 7A1 1 0 0 0 3 9v14a1 1 0 0 0 .496.864l12 7a1 1 0 0 0 1.008 0l12-7A1 1 0 0 0 29 23V9a1 1 0 0 0-.496-.864ZM16 3.158L26.016 9L16 14.842L5.984 9ZM5 10.74l10 5.833V28.26L5 22.426Zm12 17.52V16.574l10-5.833v11.685Z"></path></svg> Models Deployed</div> <div class="mt-1.5 text-gray-600"><ul><li class="ml-4 list-disc">sentence-transformers/all-mpnet-base-v2</li><li class="ml-4 list-disc">google/vit-base-patch16-224-in21k</li><li class="ml-4 list-disc">Custom model based on florentgbelidji/blip_captioning</li> </ul></div> </div></div> <div class="w-full rounded-3xl border border-gray-100 bg-white p-7 pr-12 shadow-lg"><svg class="inline w-6 h-6 -mt-1 text-gray-600" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 30 30"><path d="M15.264 19.552C15.264 23.2 17.664 25.12 20.352 25.12C23.328 25.12 26.112 22.624 26.112 19.456C26.112 16.864 24.288 15.136 22.08 15.136C21.888 15.136 21.408 15.136 21.312 15.136C22.368 12.064 25.824 8.8 29.376 7.072L26.4 4C20.448 6.976 15.264 13.504 15.264 19.552ZM0 19.552C0 23.2 2.304 25.12 5.088 25.12C8.064 25.12 10.848 22.624 10.848 19.456C10.848 16.864 8.928 15.136 6.72 15.136C6.528 15.136 6.048 15.136 5.952 15.136C7.008 12.064 10.56 8.8 14.016 7.072L11.136 4C5.184 6.976 0 13.504 0 19.552Z" fill="currentColor"></path></svg> <blockquote class="w-96 max-w-full text-lg italic text-gray-600">You're bringing the potential time delta between - I've never seen anything that could do this before - to - I could have it on infrastructure ready to support an existing product - down to potentially less than a day.</blockquote> <div class="mt-4 flex items-center"><img alt="Portrait of Nathan Labenz, Founder at Waymark" class="mr-4 h-12 w-12 rounded-full border border-gray-100 object-cover shadow-lg" loading="lazy" src="https://cdn-media.huggingface.co/marketing/inference-endpoints-page/nathan-waymark-2.png"> <div><div class="font-bold">Nathan Labenz</div> <div class="text-sm">Founder at Waymark</div> </div></div> </div></div> </div></div></div></section> <section class="flex flex-col items-center "><div class="w-full max-w-xl lg:max-w-full"><header class="mb-8"><h2 id="pricing" class="mb-2 text-3xl font-bold lg:text-center">Pricing</h2> <p class="text-lg text-gray-500 lg:text-center">Choose a plan that fits your needs</p></header> <div class="mx-auto items-stretch justify-center space-y-6 lg:flex lg:w-5/6 lg:space-x-6 lg:space-y-0"><div class="flex w-full flex-col overflow-hidden rounded-3xl border border-gray-100 bg-white shadow-lg"><div class="border-b px-7 pb-5 pt-6"><div class="flex items-center"><svg class="mr-2 text-2xl text-gray-600" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M22.5 13c-4.7 0-8.5 3.8-8.5 8.5s3.8 8.5 8.5 8.5s8.5-3.8 8.5-8.5s-3.8-8.5-8.5-8.5m6.5 8h-3c0-2-.3-4-.9-5.5c2.1 1 3.7 3 3.9 5.5m-6.5 7c-.4-.2-1.3-1.8-1.5-5h2.9c-.2 3.2-1 4.8-1.4 5M21 21c.1-3.8 1.1-5.8 1.4-6c.4.2 1.4 2.2 1.5 6zm-1.1-5.5c-.6 1.5-.8 3.5-.9 5.5h-3c.2-2.5 1.8-4.5 3.9-5.5M16.2 23H19c.1 1.6.4 3.2.9 4.5c-1.8-.8-3.2-2.5-3.7-4.5m8.9 4.5c.5-1.3.8-2.8.9-4.5h2.9c-.6 2-2 3.7-3.8 4.5"></path><path d="M25.8 10c-.9-4.6-5-8-9.8-8c-4.8 0-8.9 3.4-9.8 8.1c-3.5.7-6.2 3.7-6.2 7.4C0 21.6 3.4 25 7.5 25H11v-2H7.5c-3 0-5.5-2.5-5.5-5.5c0-2.9 2.2-5.3 5.1-5.5H8v-.9c.5-4 3.9-7.1 8-7.1c3.7 0 6.8 2.6 7.7 6z"></path></svg> <h4 class="text-xl font-semibold text-gray-900">Self-Serve </h4></div> <p class="text-gray-400">Pay as you go when using Inference Endpoints </p></div> <div class="flex flex-1 flex-col items-start rounded-b-xl border-blue-50 bg-white bg-gradient-to-t from-gray-50 to-white px-7 pb-6 pt-7 transition-shadow duration-500 hover:shadow-xl lg:rounded-none"><ul class="mb-7 space-y-1.5"><li class="flex space-x-2"><svg class="mt-1 flex-none fill-current text-gray-400/70" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M13 24l-9-9l1.414-1.414L13 21.171L26.586 7.586L28 9L13 24z" fill="currentColor"></path></svg> <div>Pay for what you use, per minute</div> </li><li class="flex space-x-2"><svg class="mt-1 flex-none fill-current text-gray-400/70" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M13 24l-9-9l1.414-1.414L13 21.171L26.586 7.586L28 9L13 24z" fill="currentColor"></path></svg> <div>Starting as low as $0.06/hour</div> </li><li class="flex space-x-2"><svg class="mt-1 flex-none fill-current text-gray-400/70" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M13 24l-9-9l1.414-1.414L13 21.171L26.586 7.586L28 9L13 24z" fill="currentColor"></path></svg> <div>Billed monthly</div> </li><li class="flex space-x-2"><svg class="mt-1 flex-none fill-current text-gray-400/70" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M13 24l-9-9l1.414-1.414L13 21.171L26.586 7.586L28 9L13 24z" fill="currentColor"></path></svg> <div>Email support</div> </li></ul> <a class="focus:shadow-outline mt-auto inline-block rounded-full border border-blue-500 bg-blue-500 px-4 py-1.5 text-center text-white hover:bg-blue-600 focus:outline-none" href="https://huggingface.co/pricing#endpoints" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" class="inline -mt-1 mr-1" width="1em" height="1em" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2"><path stroke-linecap="round" stroke-linejoin="round" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14"></path></svg> See Pricing </a></div> </div><div class="flex w-full flex-col overflow-hidden rounded-3xl border border-gray-100 bg-white shadow-lg"><div class="border-b px-7 pb-5 pt-6"><div class="flex items-center"><svg class="mr-2 text-2xl text-gray-600" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M25.798 10a10 10 0 0 0-19.62.124A7.496 7.496 0 0 0 7.5 25H8v-2h-.5a5.496 5.496 0 0 1-.377-10.98l.837-.057l.09-.833A7.993 7.993 0 0 1 23.737 10Z"></path><path d="M28 12H18a2.002 2.002 0 0 0-2 2v4h-4a2.002 2.002 0 0 0-2 2v10h20V14a2.002 2.002 0 0 0-2-2M12 28v-8h4v8Zm16 0H18V14h10Z"></path><path d="M20 16h2v4h-2zm4 0h2v4h-2zm-4 6h2v4h-2zm4 0h2v4h-2z"></path></svg> <h4 class="text-xl font-semibold text-gray-900">Enterprise </h4></div> <p class="text-gray-400">Get a custom quote and premium support </p></div> <div class="flex flex-1 flex-col items-start rounded-b-xl border-blue-50 bg-white bg-gradient-to-t from-gray-50 to-white px-7 pb-6 pt-7 transition-shadow duration-500 hover:shadow-xl lg:rounded-none"><ul class="mb-7 space-y-1.5"><li class="flex space-x-2"><svg class="mt-1 flex-none fill-current text-gray-400/70" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M13 24l-9-9l1.414-1.414L13 21.171L26.586 7.586L28 9L13 24z" fill="currentColor"></path></svg> <div>Lower marginal costs based on volume</div> </li><li class="flex space-x-2"><svg class="mt-1 flex-none fill-current text-gray-400/70" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M13 24l-9-9l1.414-1.414L13 21.171L26.586 7.586L28 9L13 24z" fill="currentColor"></path></svg> <div>Uptime guarantees</div> </li><li class="flex space-x-2"><svg class="mt-1 flex-none fill-current text-gray-400/70" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M13 24l-9-9l1.414-1.414L13 21.171L26.586 7.586L28 9L13 24z" fill="currentColor"></path></svg> <div>Custom annual contracts</div> </li><li class="flex space-x-2"><svg class="mt-1 flex-none fill-current text-gray-400/70" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M13 24l-9-9l1.414-1.414L13 21.171L26.586 7.586L28 9L13 24z" fill="currentColor"></path></svg> <div>Dedicated support, SLAs</div> </li></ul> <a class="focus:shadow-outline mt-auto inline-block rounded-full border border-blue-500 bg-blue-500 px-4 py-1.5 text-center text-white hover:bg-blue-600 focus:outline-none" href="https://huggingface.co/inference-endpoints/enterprise" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" class="inline -mt-1 mr-1" width="1em" height="1em" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2"><path stroke-linecap="round" stroke-linejoin="round" d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14"></path></svg> Request a Quote </a></div> </div></div></div></section></div></main></div> <script> { __sveltekit_hgg46r = { base: "" }; const element = document.currentScript.parentElement; const data = [{"type":"data","data":{accessToken:void 0,account:void 0,accounts:[],adminMode:false,canAddPaymentMethod:false,canPay:false,isHFemployee:false,user:void 0,vendors:[{name:"aws",status:"available",regions:[{name:"us-east-1",label:"N. Virginia",status:"available",computes:[{id:"aws-us-east-1-intel-icl-x1",accelerator:"cpu",numAccelerators:1,memoryGb:2,gpuMemoryGb:null,instanceType:"intel-icl",instanceSize:"x1",architecture:"Intel Ice Lake",status:"deprecated",pricePerHour:.032,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-intel-icl-x2",accelerator:"cpu",numAccelerators:2,memoryGb:4,gpuMemoryGb:null,instanceType:"intel-icl",instanceSize:"x2",architecture:"Intel Ice Lake",status:"deprecated",pricePerHour:.064,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-intel-icl-x4",accelerator:"cpu",numAccelerators:4,memoryGb:8,gpuMemoryGb:null,instanceType:"intel-icl",instanceSize:"x4",architecture:"Intel Ice Lake",status:"deprecated",pricePerHour:.128,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-intel-icl-x8",accelerator:"cpu",numAccelerators:8,memoryGb:16,gpuMemoryGb:null,instanceType:"intel-icl",instanceSize:"x8",architecture:"Intel Ice Lake",status:"deprecated",pricePerHour:.256,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-intel-spr-x1",accelerator:"cpu",numAccelerators:1,memoryGb:2,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x1",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.033,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-intel-spr-x2",accelerator:"cpu",numAccelerators:2,memoryGb:4,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x2",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.067,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-intel-spr-x4",accelerator:"cpu",numAccelerators:4,memoryGb:8,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x4",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.134,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-intel-spr-x8",accelerator:"cpu",numAccelerators:8,memoryGb:16,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x8",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.268,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-intel-spr-x16",accelerator:"cpu",numAccelerators:16,memoryGb:32,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x16",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.536,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-t4-x1",accelerator:"gpu",numAccelerators:1,numCpus:3,memoryGb:15,gpuMemoryGb:16,instanceType:"nvidia-t4",instanceSize:"x1",architecture:"Nvidia T4",status:"available",pricePerHour:.5,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-t4-x4",accelerator:"gpu",numAccelerators:4,numCpus:46,memoryGb:192,gpuMemoryGb:64,instanceType:"nvidia-t4",instanceSize:"x4",architecture:"Nvidia T4",status:"available",pricePerHour:3,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-a10g-x1",accelerator:"gpu",numAccelerators:1,numCpus:6,memoryGb:30,gpuMemoryGb:24,instanceType:"nvidia-a10g",instanceSize:"x1",architecture:"Nvidia A10G",status:"available",pricePerHour:1,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-a10g-x4",accelerator:"gpu",numAccelerators:4,numCpus:46,memoryGb:186,gpuMemoryGb:96,instanceType:"nvidia-a10g",instanceSize:"x4",architecture:"Nvidia A10G",status:"available",pricePerHour:5,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-a100-x1",accelerator:"gpu",numAccelerators:1,numCpus:11,memoryGb:145,gpuMemoryGb:80,instanceType:"nvidia-a100",instanceSize:"x1",architecture:"Nvidia A100",status:"available",pricePerHour:4,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-a100-x2",accelerator:"gpu",numAccelerators:2,numCpus:22,memoryGb:290,gpuMemoryGb:160,instanceType:"nvidia-a100",instanceSize:"x2",architecture:"Nvidia A100",status:"available",pricePerHour:8,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-a100-x4",accelerator:"gpu",numAccelerators:4,numCpus:44,memoryGb:580,gpuMemoryGb:320,instanceType:"nvidia-a100",instanceSize:"x4",architecture:"Nvidia A100",status:"available",pricePerHour:16,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-a100-x8",accelerator:"gpu",numAccelerators:8,numCpus:88,memoryGb:1160,gpuMemoryGb:640,instanceType:"nvidia-a100",instanceSize:"x8",architecture:"Nvidia A100",status:"available",pricePerHour:32,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-l4-x4",accelerator:"gpu",numAccelerators:4,numCpus:47,memoryGb:185,gpuMemoryGb:96,instanceType:"nvidia-l4",instanceSize:"x4",architecture:"Nvidia L4",status:"available",pricePerHour:3.8,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-l4-x1",accelerator:"gpu",numAccelerators:1,numCpus:7,memoryGb:30,gpuMemoryGb:24,instanceType:"nvidia-l4",instanceSize:"x1",architecture:"Nvidia L4",status:"available",pricePerHour:.8,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-l40s-x1",accelerator:"gpu",numAccelerators:1,numCpus:7,memoryGb:30,gpuMemoryGb:48,instanceType:"nvidia-l40s",instanceSize:"x1",architecture:"Nvidia L40S",status:"available",pricePerHour:1.8,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-l40s-x4",accelerator:"gpu",numAccelerators:4,numCpus:47,memoryGb:380,gpuMemoryGb:192,instanceType:"nvidia-l40s",instanceSize:"x4",architecture:"Nvidia L40S",status:"available",pricePerHour:8.3,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-nvidia-l40s-x8",accelerator:"gpu",numAccelerators:8,numCpus:190,memoryGb:1532,gpuMemoryGb:384,instanceType:"nvidia-l40s",instanceSize:"x8",architecture:"Nvidia L40S",status:"available",pricePerHour:23.5,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-inf2-x1",accelerator:"neuron",numAccelerators:1,numCpus:3,memoryGb:14.5,gpuMemoryGb:32,instanceType:"inf2",instanceSize:"x1",architecture:"AWS Inferentia 2",status:"available",pricePerHour:.75,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-us-east-1-inf2-x12",accelerator:"neuron",numAccelerators:12,numCpus:190,memoryGb:760,gpuMemoryGb:384,instanceType:"inf2",instanceSize:"x12",architecture:"AWS Inferentia 2",status:"available",pricePerHour:12,quota:{maxAccelerators:0,usedAccelerators:0}}]},{name:"eu-west-1",label:"Ireland",status:"available",computes:[{id:"aws-eu-west-1-intel-icl-x1",accelerator:"cpu",numAccelerators:1,memoryGb:2,gpuMemoryGb:null,instanceType:"intel-icl",instanceSize:"x1",architecture:"Intel Ice Lake",status:"deprecated",pricePerHour:.032,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-intel-icl-x2",accelerator:"cpu",numAccelerators:2,memoryGb:4,gpuMemoryGb:null,instanceType:"intel-icl",instanceSize:"x2",architecture:"Intel Ice Lake",status:"deprecated",pricePerHour:.064,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-intel-icl-x4",accelerator:"cpu",numAccelerators:4,memoryGb:8,gpuMemoryGb:null,instanceType:"intel-icl",instanceSize:"x4",architecture:"Intel Ice Lake",status:"deprecated",pricePerHour:.128,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-intel-icl-x8",accelerator:"cpu",numAccelerators:8,memoryGb:16,gpuMemoryGb:null,instanceType:"intel-icl",instanceSize:"x8",architecture:"Intel Ice Lake",status:"deprecated",pricePerHour:.256,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-intel-spr-x1",accelerator:"cpu",numAccelerators:1,memoryGb:2,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x1",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.033,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-intel-spr-x2",accelerator:"cpu",numAccelerators:2,memoryGb:4,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x2",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.067,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-intel-spr-x4",accelerator:"cpu",numAccelerators:4,memoryGb:8,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x4",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.134,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-intel-spr-x8",accelerator:"cpu",numAccelerators:8,memoryGb:16,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x8",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.268,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-intel-spr-x16",accelerator:"cpu",numAccelerators:16,memoryGb:32,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x16",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.536,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-nvidia-t4-x1",accelerator:"gpu",numAccelerators:1,numCpus:3,memoryGb:15,gpuMemoryGb:16,instanceType:"nvidia-t4",instanceSize:"x1",architecture:"Nvidia T4",status:"available",pricePerHour:.5,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-nvidia-t4-x4",accelerator:"gpu",numAccelerators:4,numCpus:46,memoryGb:192,gpuMemoryGb:64,instanceType:"nvidia-t4",instanceSize:"x4",architecture:"Nvidia T4",status:"not_available",pricePerHour:3,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-nvidia-a10g-x1",accelerator:"gpu",numAccelerators:1,numCpus:6,memoryGb:30,gpuMemoryGb:24,instanceType:"nvidia-a10g",instanceSize:"x1",architecture:"Nvidia A10G",status:"available",pricePerHour:1,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-nvidia-a10g-x4",accelerator:"gpu",numAccelerators:4,numCpus:46,memoryGb:186,gpuMemoryGb:96,instanceType:"nvidia-a10g",instanceSize:"x4",architecture:"Nvidia A10G",status:"not_available",pricePerHour:5,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-nvidia-a100-x1",accelerator:"gpu",numAccelerators:1,numCpus:11,memoryGb:145,gpuMemoryGb:80,instanceType:"nvidia-a100",instanceSize:"x1",architecture:"Nvidia A100",status:"not_available",pricePerHour:4,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-nvidia-a100-x2",accelerator:"gpu",numAccelerators:2,numCpus:22,memoryGb:290,gpuMemoryGb:160,instanceType:"nvidia-a100",instanceSize:"x2",architecture:"Nvidia A100",status:"not_available",pricePerHour:8,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-nvidia-a100-x4",accelerator:"gpu",numAccelerators:4,numCpus:44,memoryGb:580,gpuMemoryGb:320,instanceType:"nvidia-a100",instanceSize:"x4",architecture:"Nvidia A100",status:"not_available",pricePerHour:16,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"aws-eu-west-1-nvidia-a100-x8",accelerator:"gpu",numAccelerators:8,numCpus:88,memoryGb:1160,gpuMemoryGb:640,instanceType:"nvidia-a100",instanceSize:"x8",architecture:"Nvidia A100",status:"not_available",pricePerHour:32,quota:{maxAccelerators:0,usedAccelerators:0}}]},{name:"eu-central-1",label:"Frankfurt",status:"not_available",computes:[]},{name:"us-west-2",label:"Oregon",status:"not_available",computes:[]},{name:"ap-east-1",label:"Hong Kong",status:"not_available",computes:[]}]},{name:"azure",status:"available",regions:[{name:"eastus",label:"East US",status:"available",computes:[{id:"azure-eastus-intel-xeon-x1",accelerator:"cpu",numAccelerators:1,memoryGb:2,gpuMemoryGb:null,instanceType:"intel-xeon",instanceSize:"x1",architecture:"Intel Xeon",status:"available",pricePerHour:.06,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"azure-eastus-intel-xeon-x2",accelerator:"cpu",numAccelerators:2,memoryGb:4,gpuMemoryGb:null,instanceType:"intel-xeon",instanceSize:"x2",architecture:"Intel Xeon",status:"available",pricePerHour:.12,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"azure-eastus-intel-xeon-x4",accelerator:"cpu",numAccelerators:4,memoryGb:8,gpuMemoryGb:null,instanceType:"intel-xeon",instanceSize:"x4",architecture:"Intel Xeon",status:"available",pricePerHour:.24,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"azure-eastus-intel-xeon-x8",accelerator:"cpu",numAccelerators:8,memoryGb:16,gpuMemoryGb:null,instanceType:"intel-xeon",instanceSize:"x8",architecture:"Intel Xeon",status:"available",pricePerHour:.48,quota:{maxAccelerators:0,usedAccelerators:0}}]},{name:"germanywestcentral",label:"Germany West Central",status:"not_available",computes:[]},{name:"westus2",label:"West US 2",status:"not_available",computes:[]},{name:"chinanorth3",label:"China North 3",status:"not_available",computes:[]}]},{name:"gcp",status:"available",regions:[{name:"us-east4",label:"US East 4",status:"available",computes:[{id:"gcp-us-east4-intel-spr-x1",accelerator:"cpu",numAccelerators:1,numCpus:1,memoryGb:2,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x1",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.05,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-intel-spr-x2",accelerator:"cpu",numAccelerators:2,numCpus:2,memoryGb:4,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x2",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.1,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-intel-spr-x4",accelerator:"cpu",numAccelerators:4,numCpus:4,memoryGb:8,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x4",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.2,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-intel-spr-x8",accelerator:"cpu",numAccelerators:8,numCpus:8,memoryGb:16,gpuMemoryGb:null,instanceType:"intel-spr",instanceSize:"x8",architecture:"Intel Sapphire Rapids",status:"available",pricePerHour:.4,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-nvidia-t4-x1",accelerator:"gpu",numAccelerators:1,numCpus:3,memoryGb:11,gpuMemoryGb:16,instanceType:"nvidia-t4",instanceSize:"x1",architecture:"Nvidia T4",status:"available",pricePerHour:.5,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-nvidia-l4-x1",accelerator:"gpu",numAccelerators:1,numCpus:3,memoryGb:12,gpuMemoryGb:24,instanceType:"nvidia-l4",instanceSize:"x1",architecture:"Nvidia L4",status:"available",pricePerHour:.7,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-nvidia-l4-x4",accelerator:"gpu",numAccelerators:4,numCpus:46,memoryGb:186,gpuMemoryGb:96,instanceType:"nvidia-l4",instanceSize:"x4",architecture:"Nvidia L4",status:"available",pricePerHour:3.8,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-nvidia-a100-x1",accelerator:"gpu",numAccelerators:1,numCpus:11,memoryGb:174,gpuMemoryGb:80,instanceType:"nvidia-a100",instanceSize:"x1",architecture:"Nvidia A100",status:"available",pricePerHour:3.6,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-nvidia-a100-x2",accelerator:"gpu",numAccelerators:2,numCpus:23,memoryGb:348,gpuMemoryGb:160,instanceType:"nvidia-a100",instanceSize:"x2",architecture:"Nvidia A100",status:"available",pricePerHour:7.2,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-nvidia-a100-x4",accelerator:"gpu",numAccelerators:4,numCpus:47,memoryGb:696,gpuMemoryGb:320,instanceType:"nvidia-a100",instanceSize:"x4",architecture:"Nvidia A100",status:"available",pricePerHour:14.4,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-nvidia-h100-x1",accelerator:"gpu",numAccelerators:1,numCpus:25,memoryGb:240,gpuMemoryGb:80,instanceType:"nvidia-h100",instanceSize:"x1",architecture:"Nvidia H100",status:"available",pricePerHour:10,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-nvidia-h100-x2",accelerator:"gpu",numAccelerators:2,numCpus:51,memoryGb:480,gpuMemoryGb:160,instanceType:"nvidia-h100",instanceSize:"x2",architecture:"Nvidia H100",status:"available",pricePerHour:20,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-east4-nvidia-h100-x4",accelerator:"gpu",numAccelerators:4,numCpus:102,memoryGb:960,gpuMemoryGb:320,instanceType:"nvidia-h100",instanceSize:"x4",architecture:"Nvidia H100",status:"available",pricePerHour:40,quota:{maxAccelerators:0,usedAccelerators:0}}]},{name:"us-west1",label:"US West 1",status:"available",computes:[{id:"gcp-us-west1-v5e-2x4",accelerator:"tpu",numAccelerators:8,numCpus:220,memoryGb:380,gpuMemoryGb:128,instanceType:"v5e",instanceSize:"2x4",architecture:"Google Cloud TPU",status:"available",pricePerHour:9.5,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-west1-v5e-2x2",accelerator:"tpu",numAccelerators:4,numCpus:110,memoryGb:186,gpuMemoryGb:64,instanceType:"v5e",instanceSize:"2x2",architecture:"Google Cloud TPU",status:"available",pricePerHour:4.75,quota:{maxAccelerators:0,usedAccelerators:0}},{id:"gcp-us-west1-v5e-1x1",accelerator:"tpu",numAccelerators:1,numCpus:22,memoryGb:42,gpuMemoryGb:16,instanceType:"v5e",instanceSize:"1x1",architecture:"Google Cloud TPU",status:"available",pricePerHour:1.2,quota:{maxAccelerators:0,usedAccelerators:0}}]}]}]},"uses":{"search_params":["admin"],"params":["namespace"]}},{"type":"data","data":(function(a,b,c,d,e,f,g,h,i,j,k){a.maxBatchPrefillTokens=void 0;a.maxBatchTotalTokens=void 0;a.maxInputLength=void 0;a.maxTotalTokens=void 0;a.type="tgi";b[0]="nvidia-t4";c.minMemoryGb=60;d[0]="nvidia-t4";e.minMemoryGb=192;f.minMemoryGb=384;g[0]="nvidia-t4";h.minMemoryGb=48;i.minMemoryGb=48;j.minMemoryGb=24;k.minMemoryGb=16;return {account:void 0,selectedCatalogItems:[{containersConfig:{tgi:a},creatorType:"org",dontShowInCatalog:true,license:"gemma",modelName:"gemma-2-27b-it",namespace:"google",repoName:"google/gemma-2-27b-it",requirements:{bannedInstanceTypes:b,cpu:void 0,gpu:c,tpu:void 0,neuron:void 0},requirementsConfig:{bannedInstanceTypes:b,cpu:void 0,gpu:c},skipModelChecks:void 0,task:"text-generation"},{containersConfig:{tgi:a},creatorType:"org",dontShowInCatalog:false,license:"llama3.1",modelName:"Llama-3.1-70B-Instruct",namespace:"meta-llama",repoName:"meta-llama/Llama-3.1-70B-Instruct",requirements:{bannedInstanceTypes:d,cpu:void 0,gpu:e,tpu:void 0,neuron:f},requirementsConfig:{bannedInstanceTypes:d,cpu:void 0,gpu:e,neuron:f},skipModelChecks:void 0,task:"text-generation"},{containersConfig:{tgi:a},creatorType:"org",dontShowInCatalog:false,license:"apache-2.0",modelName:"Qwen2.5-Coder-7B-Instruct",namespace:"Qwen",repoName:"Qwen/Qwen2.5-Coder-7B-Instruct",requirements:{bannedInstanceTypes:g,cpu:void 0,gpu:h,tpu:void 0,neuron:void 0},requirementsConfig:{bannedInstanceTypes:g,cpu:void 0,gpu:h},skipModelChecks:void 0,task:"text-generation"},{containersConfig:void 0,creatorType:"org",dontShowInCatalog:false,license:"mit",modelName:"FLUX.1-schnell",namespace:"black-forest-labs",repoName:"black-forest-labs/FLUX.1-schnell",requirements:{bannedInstanceTypes:void 0,cpu:void 0,gpu:i,tpu:void 0,neuron:void 0},requirementsConfig:{cpu:void 0,gpu:i},skipModelChecks:void 0,task:"text-to-image"},{containersConfig:{tei:{maxBatchTokens:16384,maxConcurrentRequests:512,pooling:void 0,type:"tei"}},creatorType:"org",dontShowInCatalog:false,license:"apache-2.0",modelName:"mxbai-embed-large-v1",namespace:"mixedbread-ai",repoName:"mixedbread-ai/mxbai-embed-large-v1",requirements:{bannedInstanceTypes:void 0,cpu:void 0,gpu:j,tpu:void 0,neuron:void 0},requirementsConfig:{cpu:void 0,gpu:j},skipModelChecks:void 0,task:"sentence-embeddings"},{containersConfig:void 0,creatorType:"org",dontShowInCatalog:false,license:"mit",modelName:"whisper-large-v3-turbo",namespace:"openai",repoName:"openai/whisper-large-v3-turbo",requirements:{bannedInstanceTypes:void 0,cpu:void 0,gpu:k,tpu:void 0,neuron:void 0},requirementsConfig:{cpu:void 0,gpu:k},skipModelChecks:void 0,task:"automatic-speech-recognition"}],structuredData:{"@context":"https://schema.org/","@type":"Product",name:"Inference Endpoints by Hugging Face",description:"Deploy popular AI models in minutes using Inference Endpoint.",review:[{"@type":"Review",name:"Inference Endpoint review",author:{"@type":"Person",name:"Bryce Harlan"},positiveNotes:{"@type":"ItemList",itemListElement:[{"@type":"ListItem",position:1,name:"It took off a week's worth of developer time."}]}}]},user:void 0}}({},Array(1),{},Array(1),{},{},Array(1),{},{},{},{})),"uses":{"parent":1}}]; Promise.all([ import("/_app/immutable/entry/start.qIDgLonL.js"), import("/_app/immutable/entry/app.D-INM9aF.js") ]).then(([kit, app]) => { kit.start(app, element, { node_ids: [0, 11], data, form: null, error: null }); }); } </script> </div> </body> </html>