CINXE.COM

<!doctype html> <html lang="en" dir="ltr" class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-getting-started" data-has-hydrated="false"> <head> <meta charset="UTF-8"> <meta name="generator" content="Docusaurus v3.7.0"> <title data-rh="true">Getting started | promptfoo</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://www.promptfoo.dev/img/thumbnail.png"><meta data-rh="true" name="twitter:image" content="https://www.promptfoo.dev/img/thumbnail.png"><meta data-rh="true" property="og:url" content="https://www.promptfoo.dev/docs/getting-started/"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Getting started | promptfoo"><meta data-rh="true" name="description" content="Install promptfoo and set up your first config file by running this command with npx, npm, or brew:"><meta data-rh="true" property="og:description" content="Install promptfoo and set up your first config file by running this command with npx, npm, or brew:"><link data-rh="true" rel="icon" href="/favicon.ico"><link data-rh="true" rel="canonical" href="https://www.promptfoo.dev/docs/getting-started/"><link data-rh="true" rel="alternate" href="https://www.promptfoo.dev/docs/getting-started/" hreflang="en"><link data-rh="true" rel="alternate" href="https://www.promptfoo.dev/docs/getting-started/" hreflang="x-default"><link data-rh="true" rel="preconnect" href="https://VPUDC1V4TA-dsn.algolia.net" crossorigin="anonymous"><link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="promptfoo RSS Feed"> <link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="promptfoo Atom Feed"> <link rel="preconnect" href="https://www.google-analytics.com"> <link rel="preconnect" href="https://www.googletagmanager.com"> <script async src="https://www.googletagmanager.com/gtag/js?id=G-3TS8QLZQ93"></script> <script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-3TS8QLZQ93",{anonymize_ip:!0})</script> <link rel="search" type="application/opensearchdescription+xml" title="promptfoo" href="/opensearch.xml"> <link rel="preconnect" href="https://fonts.googleapis.com"> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin="true"> <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@100..900&amp;display=swap"> <script src="/js/scripts.js" async></script><link rel="stylesheet" href="/assets/css/styles.f7e63d14.css"> <script src="/assets/js/runtime~main.de6c1930.js" defer="defer"></script> <script src="/assets/js/main.7055da6b.js" defer="defer"></script> </head> <body class="navigation-with-keyboard"> <script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme")}catch(t){}}();t(null!==e?e:"light")}(),function(){try{const n=new URLSearchParams(window.location.search).entries();for(var[t,e]of n)if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id="__docusaurus"><div role="region" aria-label="Skip to main content"><a class="skipToContent_oPtH" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/logo-panda.svg" alt="promptfoo logo" class="themedComponent_siVc themedComponent--light_hHel"><img src="/img/logo-panda.svg" alt="promptfoo logo" class="themedComponent_siVc themedComponent--dark_yETr"></div><b class="navbar__title text--truncate">promptfoo</b></a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Product</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/security/">Gen AI Security</a></li><li><a class="dropdown__link" href="/llm-vulnerability-scanner/">Vulnerability Scanner</a></li><li><a aria-current="page" class="dropdown__link dropdown__link--active" href="/docs/getting-started/">LLM Evaluations</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Company</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/about/">About</a></li><li><a class="dropdown__link" href="/blog/">Blog</a></li><li><a class="dropdown__link" href="/press/">Press</a></li><li><a class="dropdown__link" href="/contact/">Contact</a></li><li><a class="dropdown__link" href="/careers/">Careers</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Resources</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/docs/intro/">Docs</a></li><li><a href="https://www.promptfoo.dev/models/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Foundation Model Reports<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPrP"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://github.com/promptfoo/promptfoo" target="_blank" rel="noopener noreferrer" class="dropdown__link">GitHub<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPrP"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li><a href="https://discord.gg/promptfoo" target="_blank" rel="noopener noreferrer" class="dropdown__link">Discord<svg width="12" height="12" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPrP"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><a class="navbar__item navbar__link" href="/pricing/">Enterprise</a></div><div class="navbar__items navbar__items--right"><a href="https://github.com/promptfoo/promptfoo" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"></a><a href="https://discord.gg/promptfoo" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-discord-link" aria-label="Discord community"></a><div class="toggle_ki11 colorModeToggle_Hewu"><button class="clean-btn toggleButton_MMFG toggleButtonDisabled_Uw7m" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)" aria-live="polite" aria-pressed="false"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_lgto"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_U96C"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbarSearchContainer_bzqh"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search (Command+K)"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_MB5r"><div class="docsWrapper__sE8"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_iEvu" type="button"></button><div class="docRoot_DfVB"><aside class="theme-doc-sidebar-container docSidebarContainer_c7NB"><div class="sidebarViewport_KYo0"><div class="sidebar_CUen"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_jmj1"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/intro/">Intro</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/installation/">Installation</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" href="/docs/getting-started/">Getting started</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/cloud/">Promptfoo Cloud</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/category/configuration/">Configuration</a><button aria-label="Collapse sidebar category &#x27;Configuration&#x27;" aria-expanded="true" type="button" class="clean-btn menu__caret"></button></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/configuration/guide/">Guide</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/configuration/reference/">Reference</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/configuration/parameters/">Prompts, tests, and outputs</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/configuration/chat/">Chat threads</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/configuration/datasets/">Dataset generation</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/configuration/scenarios/">Scenarios</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/configuration/caching/">Caching</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/configuration/telemetry/">Telemetry</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/category/usage/">Usage</a><button aria-label="Expand sidebar category &#x27;Usage&#x27;" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/configuration/expected-outputs/">Assertions &amp; metrics</a><button aria-label="Expand sidebar category &#x27;Assertions &amp; metrics&#x27;" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/providers/">Providers</a><button aria-label="Expand sidebar category &#x27;Providers&#x27;" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/category/integrations/">Integrations</a><button aria-label="Expand sidebar category &#x27;Integrations&#x27;" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/category/red-teaming/">Red teaming</a><button aria-label="Collapse sidebar category &#x27;Red teaming&#x27;" aria-expanded="true" type="button" class="clean-btn menu__caret"></button></div><ul style="display:block;overflow:visible;height:auto" class="menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/red-team/">Intro</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/red-team/llm-vulnerability-types/">Types of LLM vulnerabilities</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/red-team/quickstart/">Quickstart</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/red-team/configuration/">Configuration</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/red-team/architecture/">Architecture</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/red-team/owasp-llm-top-10/">OWASP LLM Top 10</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/red-team/guardrails/">Guardrails</a></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" tabindex="0" href="/docs/red-team/plugins/">Plugins</a><button aria-label="Expand sidebar category &#x27;Plugins&#x27;" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" tabindex="0" href="/docs/red-team/strategies/">Strategies</a><button aria-label="Expand sidebar category &#x27;Strategies&#x27;" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-2 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" tabindex="0" href="/docs/red-team/troubleshooting/overview/">Troubleshooting</a><button aria-label="Expand sidebar category &#x27;Troubleshooting&#x27;" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/red-team/foundation-models/">How to Red Team Foundation Models</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/red-team/rag/">How to red team RAG applications</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/red-team/agents/">How to red team LLM Agents</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-2 menu__list-item"><a class="menu__link" tabindex="0" href="/docs/guides/llm-redteaming/">How to red team LLM applications</a></li></ul></li><li class="theme-doc-sidebar-item-category theme-doc-sidebar-item-category-level-1 menu__list-item menu__list-item--collapsed"><div class="menu__list-item-collapsible"><a class="menu__link menu__link--sublist" href="/docs/category/guides/">Guides</a><button aria-label="Expand sidebar category &#x27;Guides&#x27;" aria-expanded="false" type="button" class="clean-btn menu__caret"></button></div></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/contributing/">Contributing</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/faq/">FAQ</a></li></ul></nav></div></div></aside><main class="docMainContainer_a9sJ"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_Qr34"><div class="docItemContainer_tjFy"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_T5ub" aria-label="Breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_sfvy"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="name">Getting started</span><meta itemprop="position" content="1"></li></ul></nav><div class="tocCollapsible_wXna theme-doc-toc-mobile tocMobile_Ojys"><button type="button" class="clean-btn tocCollapsibleButton_iI2p">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Getting started</h1></header> <p>Install promptfoo and set up your first config file by running this command with <a href="https://nodejs.org/en/download" target="_blank" rel="noopener noreferrer">npx</a>, <a href="https://nodejs.org/en/download" target="_blank" rel="noopener noreferrer">npm</a>, or <a href="https://brew.sh/" target="_blank" rel="noopener noreferrer">brew</a>:</p> <div class="tabs-container tabList_J5MA"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_l0OV tabs__item--active">npx</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_l0OV">npm</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_l0OV">brew</li></ul><div class="margin-top--md"><div role="tabpanel" class="tabItem_wHwb"><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>npx promptfoo@latest init --example getting-started</p></code></pre></div><div role="tabpanel" class="tabItem_wHwb" hidden=""><div class="language-bash codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_D5yF"><pre tabindex="0" class="prism-code language-bash codeBlock_RMoD thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_AclH"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">npm</span><span class="token plain"> </span><span class="token function" style="color:#d73a49">install</span><span class="token plain"> </span><span class="token parameter variable" style="color:#36acaa">-g</span><span class="token plain"> promptfoo</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain">promptfoo init </span><span class="token parameter variable" style="color:#36acaa">--example</span><span class="token plain"> getting-started</span><br></span></code></pre><div class="buttonGroup_aaMX"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_z5j7" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_FoOz"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_L0B6"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></div><div role="tabpanel" class="tabItem_wHwb" hidden=""><div class="language-bash codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_D5yF"><pre tabindex="0" class="prism-code language-bash codeBlock_RMoD thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_AclH"><span class="token-line" style="color:#393A34"><span class="token plain">brew </span><span class="token function" style="color:#d73a49">install</span><span class="token plain"> promptfoo</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain">promptfoo init </span><span class="token parameter variable" style="color:#36acaa">--example</span><span class="token plain"> getting-started</span><br></span></code></pre><div class="buttonGroup_aaMX"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_z5j7" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_FoOz"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_L0B6"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></div></div></div> <p>This will create a new directory with a basic example that tests translation prompts across different models. The example includes:</p> <ul> <li>A configuration file <code>promptfooconfig.yaml</code> with sample prompts, providers, and test cases.</li> <li>A <code>README.md</code> file explaining how the example works.</li> </ul> <p>If you prefer to start from scratch instead of using the example, simply run <code>promptfoo init</code> without the <code>--example</code> flag. The command will guide you through an interactive setup process.</p> <h2 class="anchor anchorWithStickyNavbar_JmGV" id="configuration">Configuration<a href="#configuration" class="hash-link" aria-label="Direct link to Configuration" title="Direct link to Configuration">​</a></h2> <p>Next, we can review the example configuration file and make changes to it.</p> <ol> <li> <p><strong>Set up your prompts</strong>: Open <code>promptfooconfig.yaml</code> and add prompts that you want to test. Use double curly braces as placeholders for variables: <code>{{variable_name}}</code>. For example:</p> <div class="language-yaml codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_D5yF"><pre tabindex="0" class="prism-code language-yaml codeBlock_RMoD thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_AclH"><span class="token-line" style="color:#393A34"><span class="token key atrule" style="color:#00a4db">prompts</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&#x27;Convert this English to {{language}}: {{input}}&#x27;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&#x27;Translate to {{language}}: {{input}}&#x27;</span><br></span></code></pre><div class="buttonGroup_aaMX"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_z5j7" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_FoOz"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_L0B6"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p><a href="/docs/configuration/parameters/">» More information on setting up prompts</a></p> </li> <li> <p>Add <code>providers</code> to specify which AI models you want to test. promptfoo supports 50+ providers including OpenAI, Anthropic, Google, and many others:</p> <div class="language-yaml codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_D5yF"><pre tabindex="0" class="prism-code language-yaml codeBlock_RMoD thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_AclH"><span class="token-line" style="color:#393A34"><span class="token key atrule" style="color:#00a4db">providers</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> openai</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">o3</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">mini</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> anthropic</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">messages</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">claude</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">3</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">5</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">sonnet</span><span class="token punctuation" style="color:#393A34">-</span><span class="token number" style="color:#36acaa">20241022</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> vertex</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">gemini</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">pro</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic"># Or use your own custom provider</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> file</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">//path/to/custom/provider.py</span><br></span></code></pre><div class="buttonGroup_aaMX"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_z5j7" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_FoOz"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_L0B6"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>Each provider is specified using a simple format: <code>provider_name:model_name</code>. For example:</p> <ul> <li><code>openai:o3-mini</code> for OpenAI&#x27;s o3-mini</li> <li><code>anthropic:messages:claude-3-5-sonnet-20241022</code> for Anthropic&#x27;s Claude</li> <li><code>bedrock:us.meta.llama3-3-70b-instruct-v1:0</code> for Meta&#x27;s Llama 3.3 70B via AWS Bedrock</li> </ul> <p>Most providers need authentication. For example, with OpenAI:</p> <div class="language-sh codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_D5yF"><pre tabindex="0" class="prism-code language-sh codeBlock_RMoD thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_AclH"><span class="token-line" style="color:#393A34"><span class="token builtin class-name">export</span><span class="token plain"> </span><span class="token assign-left variable" style="color:#36acaa">OPENAI_API_KEY</span><span class="token operator" style="color:#393A34">=</span><span class="token plain">sk-abc123</span><br></span></code></pre><div class="buttonGroup_aaMX"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_z5j7" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_FoOz"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_L0B6"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>You can use:</p> <ul> <li><strong>Cloud APIs</strong>: <a href="/docs/providers/openai/">OpenAI</a>, <a href="/docs/providers/anthropic/">Anthropic</a>, <a href="/docs/providers/google/">Google</a>, <a href="/docs/providers/mistral/">Mistral</a>, and <a href="/docs/providers/">many more</a></li> <li><strong>Local Models</strong>: <a href="/docs/providers/ollama/">Ollama</a>, <a href="/docs/providers/llama.cpp/">llama.cpp</a>, <a href="/docs/providers/localai/">LocalAI</a></li> <li><strong>Custom Code</strong>: <a href="/docs/providers/python/">Python</a>, <a href="/docs/providers/custom-api/">JavaScript</a>, or any <a href="/docs/providers/custom-script/">executable</a></li> </ul> <p><a href="/docs/providers/">» See our full providers documentation</a> for detailed setup instructions for each provider.</p> </li> <li> <p><strong>Add test inputs</strong>: Add some example inputs for your prompts. Optionally, add <a href="/docs/configuration/expected-outputs/">assertions</a> to set output requirements that are checked automatically.</p> <p>For example:</p> <div class="language-yaml codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_D5yF"><pre tabindex="0" class="prism-code language-yaml codeBlock_RMoD thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_AclH"><span class="token-line" style="color:#393A34"><span class="token key atrule" style="color:#00a4db">tests</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">vars</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">language</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> French</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">input</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Hello world</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">vars</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">language</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Spanish</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">input</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Where is the library</span><span class="token punctuation" style="color:#393A34">?</span><br></span></code></pre><div class="buttonGroup_aaMX"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_z5j7" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_FoOz"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_L0B6"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>When writing test cases, think of core use cases and potential failures that you want to make sure your prompts handle correctly.</p> <p><a href="/docs/configuration/guide/">» More information on setting up tests</a></p> </li> <li> <p><strong>Run the evaluation</strong>: Make sure you&#x27;re in the directory containing <code>promptfooconfig.yaml</code>, then run:</p> <div class="tabs-container tabList_J5MA"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_l0OV tabs__item--active">npx</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_l0OV">npm</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_l0OV">brew</li></ul><div class="margin-top--md"><div role="tabpanel" class="tabItem_wHwb"><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>npx promptfoo@latest eval</p></code></pre></div><div role="tabpanel" class="tabItem_wHwb" hidden=""><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>promptfoo eval</p></code></pre></div><div role="tabpanel" class="tabItem_wHwb" hidden=""><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>promptfoo eval</p></code></pre></div></div></div> <p>This tests every prompt, model, and test case.</p> </li> <li> <p>After the evaluation is complete, open the web viewer to review the outputs:</p> <div class="tabs-container tabList_J5MA"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_l0OV tabs__item--active">npx</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_l0OV">npm</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_l0OV">brew</li></ul><div class="margin-top--md"><div role="tabpanel" class="tabItem_wHwb"><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>npx promptfoo@latest view</p></code></pre></div><div role="tabpanel" class="tabItem_wHwb" hidden=""><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>promptfoo view</p></code></pre></div><div role="tabpanel" class="tabItem_wHwb" hidden=""><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>promptfoo view</p></code></pre></div></div></div> </li> </ol> <p><img decoding="async" loading="lazy" alt="Promptfoo Web UI showing evaluation results" src="/assets/images/getting-started-web-ui-5343b86f51320bff5b66addbcd3aad71.png" width="3456" height="1456" class="img_SS3x"></p> <h3 class="anchor anchorWithStickyNavbar_JmGV" id="configuration-1">Configuration<a href="#configuration-1" class="hash-link" aria-label="Direct link to Configuration" title="Direct link to Configuration">​</a></h3> <p>The YAML configuration format runs each prompt through a series of example inputs (aka &quot;test case&quot;) and checks if they meet requirements (aka &quot;assert&quot;).</p> <p>Asserts are <em>optional</em>. Many people get value out of reviewing outputs manually, and the web UI helps facilitate this.</p> <div class="theme-admonition theme-admonition-tip admonition_WCGJ alert alert--success"><div class="admonitionHeading_GCBg"><span class="admonitionIcon_L39b"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>tip</div><div class="admonitionContent_pbrs"><p>See the <a href="/docs/configuration/guide/">Configuration docs</a> for a detailed guide.</p></div></div> <details class="details_IpIu alert alert--info details_jERq" data-collapsed="true"><summary>Show example YAML</summary><div><div class="collapsibleContent_Fd2D"><div class="language-yaml codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockTitle_x_ju">promptfooconfig.yaml</div><div class="codeBlockContent_D5yF"><pre tabindex="0" class="prism-code language-yaml codeBlock_RMoD thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_AclH"><span class="token-line" style="color:#393A34"><span class="token comment" style="color:#999988;font-style:italic"># yaml-language-server: $schema=https://promptfoo.dev/config-schema.json</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token key atrule" style="color:#00a4db">prompts</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> file</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">//prompts.txt</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token key atrule" style="color:#00a4db">providers</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> openai</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">gpt</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">4o</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">mini</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token key atrule" style="color:#00a4db">tests</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">description</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> First test case </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> automatic review</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">vars</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">var1</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> first variable&#x27;s value</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">var2</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> another value</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">var3</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> some other value</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">assert</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">type</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> equals</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">value</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> expected LLM output goes here</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">type</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> javascript</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">value</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> output.includes(&#x27;some text&#x27;)</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">description</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Second test case </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> manual review</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic"># Test cases don&#x27;t need assertions if you prefer to review the output yourself</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">vars</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">var1</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> new value</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">var2</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> another value</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">var3</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> third value</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">description</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Third test case </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> other types of automatic review</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">vars</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">var1</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> yet another value</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">var2</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> and another</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">var3</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> dear llm</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> please output your response in json format</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">assert</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">type</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> contains</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">json</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">type</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> similar</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">value</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> ensures that output is semantically similar to this text</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">type</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> llm</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">rubric</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">value</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> must contain a reference to X</span><br></span></code></pre><div class="buttonGroup_aaMX"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_z5j7" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_FoOz"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_L0B6"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></div></div></details> <h2 class="anchor anchorWithStickyNavbar_JmGV" id="examples">Examples<a href="#examples" class="hash-link" aria-label="Direct link to Examples" title="Direct link to Examples">​</a></h2> <h3 class="anchor anchorWithStickyNavbar_JmGV" id="prompt-quality">Prompt quality<a href="#prompt-quality" class="hash-link" aria-label="Direct link to Prompt quality" title="Direct link to Prompt quality">​</a></h3> <p>In <a href="https://github.com/promptfoo/promptfoo/tree/main/examples/self-grading" target="_blank" rel="noopener noreferrer">this example</a>, we evaluate whether adding adjectives to the personality of an assistant bot affects the responses.</p> <p>Here is the configuration:</p> <div class="language-yaml codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockTitle_x_ju">promptfooconfig.yaml</div><div class="codeBlockContent_D5yF"><pre tabindex="0" class="prism-code language-yaml codeBlock_RMoD thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_AclH"><span class="token-line" style="color:#393A34"><span class="token comment" style="color:#999988;font-style:italic"># yaml-language-server: $schema=https://promptfoo.dev/config-schema.json</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token comment" style="color:#999988;font-style:italic"># Load prompts</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token key atrule" style="color:#00a4db">prompts</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> file</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">//prompt1.txt</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> file</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">//prompt2.txt</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token comment" style="color:#999988;font-style:italic"># Set an LLM</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token key atrule" style="color:#00a4db">providers</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> openai</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">gpt</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">4o</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">mini</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token comment" style="color:#999988;font-style:italic"># These test properties are applied to every test</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token key atrule" style="color:#00a4db">defaultTest</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">assert</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic"># Verify that the output doesn&#x27;t contain &quot;AI language model&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">type</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> not</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">contains</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">value</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> AI language model</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic"># Verify that the output doesn&#x27;t apologize</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">type</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> llm</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">rubric</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">value</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> must not contain an apology</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic"># Prefer shorter outputs using a scoring function</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">type</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> javascript</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">value</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Math.max(0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> Math.min(1</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> 1 </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> (output.length </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> 100) / 900));</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token comment" style="color:#999988;font-style:italic"># Set up individual test cases</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token key atrule" style="color:#00a4db">tests</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">vars</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">name</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Bob</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">question</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Can you help me find a specific product on your website</span><span class="token punctuation" style="color:#393A34">?</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">assert</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">type</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> contains</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">value</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> search</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">vars</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">name</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Jane</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">question</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Do you have any promotions or discounts currently available</span><span class="token punctuation" style="color:#393A34">?</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">assert</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">type</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> starts</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">with</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">value</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Yes</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">vars</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">name</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Ben</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token key atrule" style="color:#00a4db">question</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"> Can you check the availability of a product at a specific store location</span><span class="token punctuation" style="color:#393A34">?</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic"># ...</span><br></span></code></pre><div class="buttonGroup_aaMX"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_z5j7" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_FoOz"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_L0B6"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>A simple <code>npx promptfoo@latest eval</code> will run this example from the command line:</p> <p><img decoding="async" loading="lazy" src="https://user-images.githubusercontent.com/310310/244891726-480e1114-d049-40b9-bd5f-f81c15060284.gif" alt="promptfoo command line" class="img_SS3x"></p> <p>This command will evaluate the prompts, substituting variable values, and output the results in your terminal.</p> <p>Have a look at the setup and full output <a href="https://github.com/promptfoo/promptfoo/tree/main/examples/self-grading" target="_blank" rel="noopener noreferrer">here</a>.</p> <p>You can also output a nice <a href="https://docs.google.com/spreadsheets/d/1nanoj3_TniWrDl1Sj-qYqIMD6jwm5FBy15xPFdUTsmI/edit?usp=sharing" target="_blank" rel="noopener noreferrer">spreadsheet</a>, <a href="https://github.com/promptfoo/promptfoo/blob/main/examples/simple-cli/output.json" target="_blank" rel="noopener noreferrer">JSON</a>, YAML, or an HTML file:</p> <div class="tabs-container tabList_J5MA"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_l0OV tabs__item--active">npx</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_l0OV">npm</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_l0OV">brew</li></ul><div class="margin-top--md"><div role="tabpanel" class="tabItem_wHwb"><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>npx promptfoo@latest eval -o output.html</p></code></pre></div><div role="tabpanel" class="tabItem_wHwb" hidden=""><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>promptfoo eval -o output.html</p></code></pre></div><div role="tabpanel" class="tabItem_wHwb" hidden=""><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>promptfoo eval -o output.html</p></code></pre></div></div></div> <p><img decoding="async" loading="lazy" src="https://user-images.githubusercontent.com/310310/235483444-4ddb832d-e103-4b9c-a862-b0d6cc11cdc0.png" alt="Table output" class="img_SS3x"></p> <h3 class="anchor anchorWithStickyNavbar_JmGV" id="model-quality">Model quality<a href="#model-quality" class="hash-link" aria-label="Direct link to Model quality" title="Direct link to Model quality">​</a></h3> <p>In <a href="https://github.com/promptfoo/promptfoo/tree/main/examples/gpt-4o-vs-4o-mini" target="_blank" rel="noopener noreferrer">this next example</a>, we evaluate the difference between GPT 4o and GPT 4o mini outputs for a given prompt:</p> <div class="language-yaml codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_D5yF"><pre tabindex="0" class="prism-code language-yaml codeBlock_RMoD thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_AclH"><span class="token-line" style="color:#393A34"><span class="token key atrule" style="color:#00a4db">prompts</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> file</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">//prompt1.txt</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> file</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">//prompt2.txt</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token comment" style="color:#999988;font-style:italic"># Set the LLMs we want to test</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token key atrule" style="color:#00a4db">providers</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> openai</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">gpt</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">4o</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">mini</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain"> openai</span><span class="token punctuation" style="color:#393A34">:</span><span class="token plain">gpt</span><span class="token punctuation" style="color:#393A34">-</span><span class="token plain">4o</span><br></span></code></pre><div class="buttonGroup_aaMX"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_z5j7" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_FoOz"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_L0B6"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>A simple <code>npx promptfoo@latest eval</code> will run the example. Also note that you can override parameters directly from the command line. For example, this command:</p> <div class="tabs-container tabList_J5MA"><ul role="tablist" aria-orientation="horizontal" class="tabs"><li role="tab" tabindex="0" aria-selected="true" class="tabs__item tabItem_l0OV tabs__item--active">npx</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_l0OV">npm</li><li role="tab" tabindex="-1" aria-selected="false" class="tabs__item tabItem_l0OV">brew</li></ul><div class="margin-top--md"><div role="tabpanel" class="tabItem_wHwb"><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>npx promptfoo@latest eval -p prompts.txt -r openai<!-- -->:gpt-4o-mini<!-- --> openai<!-- -->:gpt-4o<!-- --> -o output.html</p></code></pre></div><div role="tabpanel" class="tabItem_wHwb" hidden=""><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>promptfoo eval -p prompts.txt -r openai<!-- -->:gpt-4o-mini<!-- --> openai<!-- -->:gpt-4o<!-- --> -o output.html</p></code></pre></div><div role="tabpanel" class="tabItem_wHwb" hidden=""><pre tabindex="0" class="codeBlockStandalone_wQog thin-scrollbar codeBlockContainer_mQmQ theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><code class="codeBlockLines_AclH"><p>promptfoo eval -p prompts.txt -r openai<!-- -->:gpt-4o-mini<!-- --> openai<!-- -->:gpt-4o<!-- --> -o output.html</p></code></pre></div></div></div> <p>Produces this HTML table:</p> <p><img decoding="async" loading="lazy" src="https://user-images.githubusercontent.com/310310/235490527-e0c31f40-00a0-493a-8afc-8ed6322bb5ca.png" alt="Side-by-side evaluation of LLM model quality, gpt-4o vs gpt-4o-mini, html output" class="img_SS3x"></p> <p>Full setup and output <a href="https://github.com/promptfoo/promptfoo/tree/main/examples/gpt-4o-vs-4o-mini" target="_blank" rel="noopener noreferrer">here</a>.</p> <p>A similar approach can be used to run other model comparisons. For example, you can:</p> <ul> <li>Compare same models with different temperatures (see <a href="https://github.com/promptfoo/promptfoo/tree/main/examples/gpt-4o-temperature-comparison" target="_blank" rel="noopener noreferrer">GPT temperature comparison</a>)</li> <li>Compare Llama vs. GPT (see <a href="/docs/guides/compare-llama2-vs-gpt/">Llama vs GPT benchmark</a>)</li> <li>Compare Retrieval-Augmented Generation (RAG) with LangChain vs. regular GPT-4 (see <a href="/docs/configuration/testing-llm-chains/">LangChain example</a>)</li> </ul> <h2 class="anchor anchorWithStickyNavbar_JmGV" id="other-examples">Other examples<a href="#other-examples" class="hash-link" aria-label="Direct link to Other examples" title="Direct link to Other examples">​</a></h2> <p>There are many examples available in the <a href="https://github.com/promptfoo/promptfoo/tree/main/examples" target="_blank" rel="noopener noreferrer"><code>examples/</code></a> directory of our Github repository.</p> <h2 class="anchor anchorWithStickyNavbar_JmGV" id="automatically-assess-outputs">Automatically assess outputs<a href="#automatically-assess-outputs" class="hash-link" aria-label="Direct link to Automatically assess outputs" title="Direct link to Automatically assess outputs">​</a></h2> <p>The above examples create a table of outputs that can be manually reviewed. By setting up assertions, you can automatically grade outputs on a pass/fail basis.</p> <p>For more information on automatically assessing outputs, see <a href="/docs/configuration/expected-outputs/">Assertions &amp; Metrics</a>.</p></div><footer class="theme-doc-footer docusaurus-mt-lg"><div class="row margin-top--sm theme-doc-footer-edit-meta-row"><div class="col"><a href="https://github.com/promptfoo/promptfoo/tree/main/site/docs/getting-started.md" target="_blank" rel="noopener noreferrer" class="theme-edit-this-page"><svg fill="currentColor" height="20" width="20" viewBox="0 0 40 40" class="iconEdit_bHB7" aria-hidden="true"><g><path d="m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"></path></g></svg>Edit this page</a></div><div class="col lastUpdated_ydrU"></div></div></footer></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/installation/"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">Installation</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/cloud/"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Promptfoo Cloud</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_XG6w thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#configuration" class="table-of-contents__link toc-highlight">Configuration</a><ul><li><a href="#configuration-1" class="table-of-contents__link toc-highlight">Configuration</a></li></ul></li><li><a href="#examples" class="table-of-contents__link toc-highlight">Examples</a><ul><li><a href="#prompt-quality" class="table-of-contents__link toc-highlight">Prompt quality</a></li><li><a href="#model-quality" class="table-of-contents__link toc-highlight">Model quality</a></li></ul></li><li><a href="#other-examples" class="table-of-contents__link toc-highlight">Other examples</a></li><li><a href="#automatically-assess-outputs" class="table-of-contents__link toc-highlight">Automatically assess outputs</a></li></ul></div></div></div></div></main></div></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Product</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/intro/">Docs</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/getting-started/">Command Line</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/usage/node-package/">Node Package</a></li><li class="footer__item"><a class="footer__link-item" href="/pricing/">Enterprise</a></li><li class="footer__item"><a href="https://status.promptfoo.dev" target="_blank" rel="noopener noreferrer" class="footer__link-item">Status<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPrP"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">Resources</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/red-team/">LLM Red Teaming</a></li><li class="footer__item"><a href="https://www.promptfoo.dev/models/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Foundation Model Reports</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/guides/llama2-uncensored-benchmark-ollama/">Running Benchmarks</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/guides/factuality-eval/">Evaluating Factuality</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/guides/evaluate-rag/">Evaluating RAGs</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/guides/prevent-llm-hallucations/">Minimizing Hallucinations</a></li><li class="footer__item"><a class="footer__link-item" href="/validator/">Config Validator</a></li></ul></div><div class="col footer__col"><div class="footer__title">Company</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/about/">About</a></li><li class="footer__item"><a class="footer__link-item" href="/blog/">Blog</a></li><li class="footer__item"><a class="footer__link-item" href="/press/">Press</a></li><li class="footer__item"><a class="footer__link-item" href="/contact/">Contact</a></li><li class="footer__item"><a class="footer__link-item" href="/careers/">Careers</a></li></ul></div><div class="col footer__col"><div class="footer__title">Legal &amp; Social</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://github.com/promptfoo/promptfoo" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPrP"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://discord.gg/promptfoo" target="_blank" rel="noopener noreferrer" class="footer__link-item">Discord<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPrP"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/promptfoo/" target="_blank" rel="noopener noreferrer" class="footer__link-item">LinkedIn<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPrP"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a class="footer__link-item" href="/privacy/">Privacy Policy</a></li><li class="footer__item"><a class="footer__link-item" href="/terms-of-service/">Terms of Service</a></li><li class="footer__item"> <div style="position: relative; margin-top:8px"> <span style="position: absolute; left: 65px; top: 25px; font-size: 10px; font-weight: bold; background-color: #25842c; padding: 2px 4px; border-radius: 4px;">In Progress</span> <img loading="lazy" src="/img/badges/soc2.png" alt="SOC2 Compliance in progress" style="width:80px; height: auto"> </div> </li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright">© 2025 promptfoo</div></div></div></footer></div> <!-- Cloudflare Pages Analytics --><script defer src='https://static.cloudflareinsights.com/beacon.min.js' data-cf-beacon='{"token": "1c4bd5e1107e49379a47b948d21d50e1"}'></script><!-- Cloudflare Pages Analytics --></body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10