CINXE.COM

ORPO Trainer

<!doctype html> <html class=""> <head> <meta charset="utf-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no" /> <meta name="description" content="We’re on a journey to advance and democratize artificial intelligence through open source and open science." /> <meta property="fb:app_id" content="1321688464574422" /> <meta name="twitter:card" content="summary_large_image" /> <meta name="twitter:site" content="@huggingface" /> <meta name="twitter:image" content="https://huggingface.co/front/thumbnails/docs/trl.png" /> <meta property="og:title" content="ORPO Trainer" /> <meta property="og:type" content="website" /> <meta property="og:url" content="https://huggingface.co/docs/trl/orpo_trainer" /> <meta property="og:image" content="https://huggingface.co/front/thumbnails/docs/trl.png" /> <link rel="stylesheet" href="/front/build/kube-9063520/style.css" /> <link rel="preconnect" href="https://fonts.gstatic.com" /> <link href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:ital,wght@0,200;0,300;0,400;0,600;0,700;0,900;1,200;1,300;1,400;1,600;1,700;1,900&display=swap" rel="stylesheet" /> <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600;700&display=swap" rel="stylesheet" /> <link rel="preload" href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.12.0/katex.min.css" as="style" onload="this.onload=null;this.rel='stylesheet'" /> <noscript> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.12.0/katex.min.css" /> </noscript> <script>const guestTheme = document.cookie.match(/theme=(\w+)/)?.[1]; document.documentElement.classList.toggle('dark', guestTheme === 'dark' || ( (!guestTheme || guestTheme === 'system') && window.matchMedia('(prefers-color-scheme: dark)').matches));</script> <link rel="canonical" href="https://huggingface.co/docs/trl/orpo_trainer"> <link rel="alternate" hreflang="en" href="https://huggingface.co/docs/trl/en/orpo_trainer"> <link rel="alternate" hreflang="x-default" href="https://huggingface.co/docs/trl/orpo_trainer"> <title>ORPO Trainer</title> <script defer data-domain="huggingface.co" event-loggedIn="false" src="/js/script.pageview-props.js" ></script> <script> window.plausible = window.plausible || function () { (window.plausible.q = window.plausible.q || []).push(arguments); }; </script> <script> window.hubConfig = {"features":{"signupDisabled":false},"sshGitUrl":"git@hf.co","moonHttpUrl":"https:\/\/huggingface.co","captchaApiKey":"bd5f2066-93dc-4bdd-a64b-a24646ca3859","captchaDisabledOnSignup":true,"datasetViewerPublicUrl":"https:\/\/datasets-server.huggingface.co","stripePublicKey":"pk_live_x2tdjFXBCvXo2FFmMybezpeM00J6gPCAAc","environment":"production","userAgent":"HuggingFace (production)","spacesIframeDomain":"hf.space","spacesApiUrl":"https:\/\/api.hf.space","docSearchKey":"ece5e02e57300e17d152c08056145326e90c4bff3dd07d7d1ae40cf1c8d39cb6","logoDev":{"apiUrl":"https:\/\/img.logo.dev\/","apiKey":"pk_UHS2HZOeRnaSOdDp7jbd5w"}}; </script> <script type="text/javascript" src="https://de5282c3ca0c.edge.sdk.awswaf.com/de5282c3ca0c/526cf06acb0d/challenge.js" defer></script> </head> <body class="flex flex-col min-h-dvh bg-white dark:bg-gray-950 text-black DocBuilderPage"> <div class="flex min-h-dvh flex-col"><div class="SVELTE_HYDRATER contents" data-target="SystemThemeMonitor" data-props="{&quot;isLoggedIn&quot;:false}"></div> <div class="SVELTE_HYDRATER contents" data-target="MainHeader" data-props="{&quot;classNames&quot;:&quot;&quot;,&quot;isWide&quot;:true,&quot;isZh&quot;:false,&quot;isPro&quot;:false}"><header class="border-b border-gray-100 "><div class="w-full px-4 flex h-16 items-center"><div class="flex flex-1 items-center"><a class="mr-5 flex flex-none items-center lg:mr-6" href="/"><img alt="Hugging Face's logo" class="w-7 md:mr-2" src="/front/assets/huggingface_logo-noborder.svg"> <span class="hidden whitespace-nowrap text-lg font-bold md:block">Hugging Face</span></a> <div class="relative flex-1 lg:max-w-sm mr-2 sm:mr-4 md:mr-3 xl:mr-6"><input autocomplete="off" class="w-full dark:bg-gray-950 pl-8 form-input-alt h-9 pr-3 focus:shadow-xl " name="" placeholder="Search models, datasets, users..." spellcheck="false" type="text" value=""> <svg class="absolute left-2.5 text-gray-400 top-1/2 transform -translate-y-1/2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M30 28.59L22.45 21A11 11 0 1 0 21 22.45L28.59 30zM5 14a9 9 0 1 1 9 9a9 9 0 0 1-9-9z" fill="currentColor"></path></svg> </div> <div class="flex flex-none items-center justify-center p-0.5 place-self-stretch lg:hidden"><button class="relative z-40 flex h-6 w-8 items-center justify-center" type="button"><svg width="1em" height="1em" viewBox="0 0 10 10" class="text-xl" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" preserveAspectRatio="xMidYMid meet" fill="currentColor"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.65039 2.9999C1.65039 2.8066 1.80709 2.6499 2.00039 2.6499H8.00039C8.19369 2.6499 8.35039 2.8066 8.35039 2.9999C8.35039 3.1932 8.19369 3.3499 8.00039 3.3499H2.00039C1.80709 3.3499 1.65039 3.1932 1.65039 2.9999ZM1.65039 4.9999C1.65039 4.8066 1.80709 4.6499 2.00039 4.6499H8.00039C8.19369 4.6499 8.35039 4.8066 8.35039 4.9999C8.35039 5.1932 8.19369 5.3499 8.00039 5.3499H2.00039C1.80709 5.3499 1.65039 5.1932 1.65039 4.9999ZM2.00039 6.6499C1.80709 6.6499 1.65039 6.8066 1.65039 6.9999C1.65039 7.1932 1.80709 7.3499 2.00039 7.3499H8.00039C8.19369 7.3499 8.35039 7.1932 8.35039 6.9999C8.35039 6.8066 8.19369 6.6499 8.00039 6.6499H2.00039Z"></path></svg> </button> </div></div> <nav aria-label="Main" class="ml-auto hidden lg:block"><ul class="flex items-center space-x-1.5 2xl:space-x-2"><li class="hover:text-indigo-700"><a class="group flex items-center px-2 py-0.5 dark:text-gray-300 dark:hover:text-gray-100" href="/models"><svg class="mr-1.5 text-gray-400 group-hover:text-indigo-500" style="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg> Models</a> </li><li class="hover:text-red-700"><a class="group flex items-center px-2 py-0.5 dark:text-gray-300 dark:hover:text-gray-100" href="/datasets"><svg class="mr-1.5 text-gray-400 group-hover:text-red-500" style="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 25 25"><ellipse cx="12.5" cy="5" fill="currentColor" fill-opacity="0.25" rx="7.5" ry="2"></ellipse><path d="M12.5 15C16.6421 15 20 14.1046 20 13V20C20 21.1046 16.6421 22 12.5 22C8.35786 22 5 21.1046 5 20V13C5 14.1046 8.35786 15 12.5 15Z" fill="currentColor" opacity="0.5"></path><path d="M12.5 7C16.6421 7 20 6.10457 20 5V11.5C20 12.6046 16.6421 13.5 12.5 13.5C8.35786 13.5 5 12.6046 5 11.5V5C5 6.10457 8.35786 7 12.5 7Z" fill="currentColor" opacity="0.5"></path><path d="M5.23628 12C5.08204 12.1598 5 12.8273 5 13C5 14.1046 8.35786 15 12.5 15C16.6421 15 20 14.1046 20 13C20 12.8273 19.918 12.1598 19.7637 12C18.9311 12.8626 15.9947 13.5 12.5 13.5C9.0053 13.5 6.06886 12.8626 5.23628 12Z" fill="currentColor"></path></svg> Datasets</a> </li><li class="hover:text-blue-700"><a class="group flex items-center px-2 py-0.5 dark:text-gray-300 dark:hover:text-gray-100" href="/spaces"><svg class="mr-1.5 text-gray-400 group-hover:text-blue-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 25 25"><path opacity=".5" d="M6.016 14.674v4.31h4.31v-4.31h-4.31ZM14.674 14.674v4.31h4.31v-4.31h-4.31ZM6.016 6.016v4.31h4.31v-4.31h-4.31Z" fill="currentColor"></path><path opacity=".75" fill-rule="evenodd" clip-rule="evenodd" d="M3 4.914C3 3.857 3.857 3 4.914 3h6.514c.884 0 1.628.6 1.848 1.414a5.171 5.171 0 0 1 7.31 7.31c.815.22 1.414.964 1.414 1.848v6.514A1.914 1.914 0 0 1 20.086 22H4.914A1.914 1.914 0 0 1 3 20.086V4.914Zm3.016 1.102v4.31h4.31v-4.31h-4.31Zm0 12.968v-4.31h4.31v4.31h-4.31Zm8.658 0v-4.31h4.31v4.31h-4.31Zm0-10.813a2.155 2.155 0 1 1 4.31 0 2.155 2.155 0 0 1-4.31 0Z" fill="currentColor"></path><path opacity=".25" d="M16.829 6.016a2.155 2.155 0 1 0 0 4.31 2.155 2.155 0 0 0 0-4.31Z" fill="currentColor"></path></svg> Spaces</a> </li><li class="hover:text-yellow-700 max-xl:hidden"><a class="group flex items-center px-2 py-0.5 dark:text-gray-300 dark:hover:text-gray-100" href="/posts"><svg class="mr-1.5 text-gray-400 group-hover:text-yellow-500 text-yellow-500!" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 12 12" preserveAspectRatio="xMidYMid meet"><path fill="currentColor" fill-rule="evenodd" d="M3.73 2.4A4.25 4.25 0 1 1 6 10.26H2.17l-.13-.02a.43.43 0 0 1-.3-.43l.01-.06a.43.43 0 0 1 .12-.22l.84-.84A4.26 4.26 0 0 1 3.73 2.4Z" clip-rule="evenodd"></path></svg> Posts</a> </li><li class="hover:text-yellow-700"><a class="group flex items-center px-2 py-0.5 dark:text-gray-300 dark:hover:text-gray-100" href="/docs"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="mr-1.5 text-gray-400 group-hover:text-yellow-500" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 16 16"><path d="m2.28 3.7-.3.16a.67.67 0 0 0-.34.58v8.73l.01.04.02.07.01.04.03.06.02.04.02.03.04.06.05.05.04.04.06.04.06.04.08.04.08.02h.05l.07.02h.11l.04-.01.07-.02.03-.01.07-.03.22-.12a5.33 5.33 0 0 1 5.15.1.67.67 0 0 0 .66 0 5.33 5.33 0 0 1 5.33 0 .67.67 0 0 0 1-.58V4.36a.67.67 0 0 0-.34-.5l-.3-.17v7.78a.63.63 0 0 1-.87.59 4.9 4.9 0 0 0-4.35.35l-.65.39a.29.29 0 0 1-.15.04.29.29 0 0 1-.16-.04l-.65-.4a4.9 4.9 0 0 0-4.34-.34.63.63 0 0 1-.87-.59V3.7Z" fill="currentColor" class="dark:opacity-40"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M8 3.1a5.99 5.99 0 0 0-5.3-.43.66.66 0 0 0-.42.62v8.18c0 .45.46.76.87.59a4.9 4.9 0 0 1 4.34.35l.65.39c.05.03.1.04.16.04.05 0 .1-.01.15-.04l.65-.4a4.9 4.9 0 0 1 4.35-.34.63.63 0 0 0 .86-.59V3.3a.67.67 0 0 0-.41-.62 5.99 5.99 0 0 0-5.3.43l-.3.17L8 3.1Zm.73 1.87a.43.43 0 1 0-.86 0v5.48a.43.43 0 0 0 .86 0V4.97Z" fill="currentColor" class="opacity-40 dark:opacity-100"></path><path d="M8.73 4.97a.43.43 0 1 0-.86 0v5.48a.43.43 0 1 0 .86 0V4.96Z" fill="currentColor" class="dark:opacity-40"></path></svg> Docs</a> </li><li class="hover:text-black dark:hover:text-white"><a class="group flex items-center px-2 py-0.5 dark:text-gray-300 dark:hover:text-gray-100" href="/enterprise"><svg class="mr-1.5 text-gray-400 group-hover:text-black dark:group-hover:text-white" xmlns="http://www.w3.org/2000/svg" fill="none" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 33 27"><path fill="currentColor" fill-rule="evenodd" d="M13.5.7a8.7 8.7 0 0 0-7.7 5.7L1 20.6c-1 3.1.9 5.7 4.1 5.7h15c3.3 0 6.8-2.6 7.8-5.7l4.6-14.2c1-3.1-.8-5.7-4-5.7h-15Zm1.1 5.7L9.8 20.3h9.8l1-3.1h-5.8l.8-2.5h4.8l1.1-3h-4.8l.8-2.3H23l1-3h-9.5Z" clip-rule="evenodd"></path></svg> Enterprise</a> </li> <li><a class="group flex items-center px-2 py-0.5 dark:text-gray-300 dark:hover:text-gray-100" href="/pricing">Pricing </a></li> <li><div class="relative group"> <button class="px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-600 flex items-center " type="button"> <svg class=" text-gray-500 w-5 group-hover:text-gray-400 dark:text-gray-300 dark:group-hover:text-gray-100" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 32 18" preserveAspectRatio="xMidYMid meet"><path fill-rule="evenodd" clip-rule="evenodd" d="M14.4504 3.30221C14.4504 2.836 14.8284 2.45807 15.2946 2.45807H28.4933C28.9595 2.45807 29.3374 2.836 29.3374 3.30221C29.3374 3.76842 28.9595 4.14635 28.4933 4.14635H15.2946C14.8284 4.14635 14.4504 3.76842 14.4504 3.30221Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M14.4504 9.00002C14.4504 8.53382 14.8284 8.15588 15.2946 8.15588H28.4933C28.9595 8.15588 29.3374 8.53382 29.3374 9.00002C29.3374 9.46623 28.9595 9.84417 28.4933 9.84417H15.2946C14.8284 9.84417 14.4504 9.46623 14.4504 9.00002Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M14.4504 14.6978C14.4504 14.2316 14.8284 13.8537 15.2946 13.8537H28.4933C28.9595 13.8537 29.3374 14.2316 29.3374 14.6978C29.3374 15.164 28.9595 15.542 28.4933 15.542H15.2946C14.8284 15.542 14.4504 15.164 14.4504 14.6978Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M1.94549 6.87377C2.27514 6.54411 2.80962 6.54411 3.13928 6.87377L6.23458 9.96907L9.32988 6.87377C9.65954 6.54411 10.194 6.54411 10.5237 6.87377C10.8533 7.20343 10.8533 7.73791 10.5237 8.06756L6.23458 12.3567L1.94549 8.06756C1.61583 7.73791 1.61583 7.20343 1.94549 6.87377Z" fill="currentColor"></path></svg> </button> </div></li> <li><hr class="h-5 w-0.5 border-none bg-gray-100 dark:bg-gray-800"></li> <li><a class="block cursor-pointer whitespace-nowrap px-2 py-0.5 hover:text-gray-500 dark:text-gray-300 dark:hover:text-gray-100" href="/login">Log In </a></li> <li><a class="whitespace-nowrap rounded-full border border-transparent bg-gray-900 px-3 py-1 leading-none text-white hover:border-black hover:bg-white hover:text-black" href="/join">Sign Up </a></li></ul></nav></div></header></div> <div class="SVELTE_HYDRATER contents" data-target="SSOBanner" data-props="{}"></div> <main class="flex flex-1 flex-col"><div class="relative lg:flex" id="hf-doc-container"><div class="sticky top-0 z-20 self-start"><div class="SVELTE_HYDRATER contents" data-target="SideMenu" data-props="{&quot;chapters&quot;:[{&quot;title&quot;:&quot;Getting started&quot;,&quot;isExpanded&quot;:true,&quot;sections&quot;:[{&quot;title&quot;:&quot;TRL&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;index&quot;,&quot;url&quot;:&quot;/docs/trl/index&quot;},{&quot;title&quot;:&quot;Installation&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;installation&quot;,&quot;url&quot;:&quot;/docs/trl/installation&quot;},{&quot;title&quot;:&quot;Quickstart&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;quickstart&quot;,&quot;url&quot;:&quot;/docs/trl/quickstart&quot;}]},{&quot;title&quot;:&quot;Conceptual Guides&quot;,&quot;isExpanded&quot;:true,&quot;sections&quot;:[{&quot;title&quot;:&quot;Dataset Formats&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;dataset_formats&quot;,&quot;url&quot;:&quot;/docs/trl/dataset_formats&quot;},{&quot;title&quot;:&quot;Training FAQ&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;how_to_train&quot;,&quot;url&quot;:&quot;/docs/trl/how_to_train&quot;},{&quot;title&quot;:&quot;Understanding Logs&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;logging&quot;,&quot;url&quot;:&quot;/docs/trl/logging&quot;}]},{&quot;title&quot;:&quot;How-to guides&quot;,&quot;isExpanded&quot;:true,&quot;sections&quot;:[{&quot;title&quot;:&quot;Command Line Interface (CLI)&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;clis&quot;,&quot;url&quot;:&quot;/docs/trl/clis&quot;},{&quot;title&quot;:&quot;Customizing the Training&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;customization&quot;,&quot;url&quot;:&quot;/docs/trl/customization&quot;},{&quot;title&quot;:&quot;Reducing Memory Usage&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;reducing_memory_usage&quot;,&quot;url&quot;:&quot;/docs/trl/reducing_memory_usage&quot;},{&quot;title&quot;:&quot;Speeding Up Training&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;speeding_up_training&quot;,&quot;url&quot;:&quot;/docs/trl/speeding_up_training&quot;},{&quot;title&quot;:&quot;Using Trained Models&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;use_model&quot;,&quot;url&quot;:&quot;/docs/trl/use_model&quot;}]},{&quot;title&quot;:&quot;Integrations&quot;,&quot;isExpanded&quot;:true,&quot;sections&quot;:[{&quot;title&quot;:&quot;DeepSpeed&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;deepspeed_integration&quot;,&quot;url&quot;:&quot;/docs/trl/deepspeed_integration&quot;},{&quot;title&quot;:&quot;Liger Kernel&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;liger_kernel_integration&quot;,&quot;url&quot;:&quot;/docs/trl/liger_kernel_integration&quot;},{&quot;title&quot;:&quot;PEFT&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;peft_integration&quot;,&quot;url&quot;:&quot;/docs/trl/peft_integration&quot;},{&quot;title&quot;:&quot;Unsloth&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;unsloth_integration&quot;,&quot;url&quot;:&quot;/docs/trl/unsloth_integration&quot;}]},{&quot;title&quot;:&quot;Examples&quot;,&quot;isExpanded&quot;:true,&quot;sections&quot;:[{&quot;title&quot;:&quot;Example Overview&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;example_overview&quot;,&quot;url&quot;:&quot;/docs/trl/example_overview&quot;},{&quot;title&quot;:&quot;Community Tutorials&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;community_tutorials&quot;,&quot;url&quot;:&quot;/docs/trl/community_tutorials&quot;},{&quot;title&quot;:&quot;Sentiment Tuning&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;sentiment_tuning&quot;,&quot;url&quot;:&quot;/docs/trl/sentiment_tuning&quot;},{&quot;title&quot;:&quot;Training StackLlama&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;using_llama_models&quot;,&quot;url&quot;:&quot;/docs/trl/using_llama_models&quot;},{&quot;title&quot;:&quot;Detoxifying a Language Model&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;detoxifying_a_lm&quot;,&quot;url&quot;:&quot;/docs/trl/detoxifying_a_lm&quot;},{&quot;title&quot;:&quot;Learning to Use Tools&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;learning_tools&quot;,&quot;url&quot;:&quot;/docs/trl/learning_tools&quot;},{&quot;title&quot;:&quot;Multi Adapter RLHF&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;multi_adapter_rl&quot;,&quot;url&quot;:&quot;/docs/trl/multi_adapter_rl&quot;}]},{&quot;title&quot;:&quot;API&quot;,&quot;isExpanded&quot;:true,&quot;sections&quot;:[{&quot;title&quot;:&quot;Trainers&quot;,&quot;isExpanded&quot;:true,&quot;sections&quot;:[{&quot;title&quot;:&quot;AlignProp&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;alignprop_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/alignprop_trainer&quot;},{&quot;title&quot;:&quot;BCO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;bco_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/bco_trainer&quot;},{&quot;title&quot;:&quot;CPO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;cpo_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/cpo_trainer&quot;},{&quot;title&quot;:&quot;DDPO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;ddpo_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/ddpo_trainer&quot;},{&quot;title&quot;:&quot;DPO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;dpo_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/dpo_trainer&quot;},{&quot;title&quot;:&quot;Online DPO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;online_dpo_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/online_dpo_trainer&quot;},{&quot;title&quot;:&quot;GKD&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;gkd_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/gkd_trainer&quot;},{&quot;title&quot;:&quot;GRPO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;grpo_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/grpo_trainer&quot;},{&quot;title&quot;:&quot;KTO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;kto_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/kto_trainer&quot;},{&quot;title&quot;:&quot;Nash-MD&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;nash_md_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/nash_md_trainer&quot;},{&quot;title&quot;:&quot;ORPO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;orpo_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/orpo_trainer&quot;},{&quot;title&quot;:&quot;PPO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;ppo_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/ppo_trainer&quot;},{&quot;title&quot;:&quot;PRM&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;prm_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/prm_trainer&quot;},{&quot;title&quot;:&quot;Reward&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;reward_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/reward_trainer&quot;},{&quot;title&quot;:&quot;RLOO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;rloo_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/rloo_trainer&quot;},{&quot;title&quot;:&quot;SFT&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;sft_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/sft_trainer&quot;},{&quot;title&quot;:&quot;Iterative SFT&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;iterative_sft_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/iterative_sft_trainer&quot;},{&quot;title&quot;:&quot;XPO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;xpo_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/xpo_trainer&quot;}]},{&quot;title&quot;:&quot;Model Classes&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;models&quot;,&quot;url&quot;:&quot;/docs/trl/models&quot;},{&quot;title&quot;:&quot;Best of N Sampling&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;best_of_n&quot;,&quot;url&quot;:&quot;/docs/trl/best_of_n&quot;},{&quot;title&quot;:&quot;Judges&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;judges&quot;,&quot;url&quot;:&quot;/docs/trl/judges&quot;},{&quot;title&quot;:&quot;Callbacks&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;callbacks&quot;,&quot;url&quot;:&quot;/docs/trl/callbacks&quot;},{&quot;title&quot;:&quot;Data Utilities&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;data_utils&quot;,&quot;url&quot;:&quot;/docs/trl/data_utils&quot;},{&quot;title&quot;:&quot;Text Environments&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;text_environments&quot;,&quot;url&quot;:&quot;/docs/trl/text_environments&quot;},{&quot;title&quot;:&quot;Script Utilities&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;script_utils&quot;,&quot;url&quot;:&quot;/docs/trl/script_utils&quot;}]}],&quot;chapterId&quot;:&quot;orpo_trainer&quot;,&quot;docType&quot;:&quot;docs&quot;,&quot;isLoggedIn&quot;:false,&quot;lang&quot;:&quot;en&quot;,&quot;langs&quot;:[&quot;en&quot;],&quot;library&quot;:&quot;trl&quot;,&quot;theme&quot;:&quot;light&quot;,&quot;version&quot;:&quot;v0.15.2&quot;,&quot;versions&quot;:[{&quot;version&quot;:&quot;main&quot;},{&quot;version&quot;:&quot;v0.15.2&quot;},{&quot;version&quot;:&quot;v0.15.1&quot;},{&quot;version&quot;:&quot;v0.15.0&quot;},{&quot;version&quot;:&quot;v0.14.0&quot;},{&quot;version&quot;:&quot;v0.13.0&quot;},{&quot;version&quot;:&quot;v0.12.2&quot;},{&quot;version&quot;:&quot;v0.12.1&quot;},{&quot;version&quot;:&quot;v0.12.0&quot;},{&quot;version&quot;:&quot;v0.11.4&quot;},{&quot;version&quot;:&quot;v0.11.3&quot;},{&quot;version&quot;:&quot;v0.11.2&quot;},{&quot;version&quot;:&quot;v0.11.1&quot;},{&quot;version&quot;:&quot;v0.11.0&quot;},{&quot;version&quot;:&quot;v0.10.1&quot;},{&quot;version&quot;:&quot;v0.9.6&quot;},{&quot;version&quot;:&quot;v0.9.4&quot;},{&quot;version&quot;:&quot;v0.9.3&quot;},{&quot;version&quot;:&quot;v0.9.2&quot;},{&quot;version&quot;:&quot;v0.8.6&quot;},{&quot;version&quot;:&quot;v0.8.5&quot;},{&quot;version&quot;:&quot;v0.8.4&quot;},{&quot;version&quot;:&quot;v0.8.3&quot;},{&quot;version&quot;:&quot;v0.8.2&quot;},{&quot;version&quot;:&quot;v0.8.1&quot;},{&quot;version&quot;:&quot;v0.8.0&quot;},{&quot;version&quot;:&quot;v0.7.11&quot;},{&quot;version&quot;:&quot;v0.7.10&quot;},{&quot;version&quot;:&quot;v0.7.9&quot;},{&quot;version&quot;:&quot;v0.7.8&quot;},{&quot;version&quot;:&quot;v0.7.4&quot;},{&quot;version&quot;:&quot;v0.7.2&quot;},{&quot;version&quot;:&quot;v0.7.1&quot;},{&quot;version&quot;:&quot;v0.7.0&quot;},{&quot;version&quot;:&quot;v0.6.0&quot;},{&quot;version&quot;:&quot;v0.5.0&quot;},{&quot;version&quot;:&quot;v0.4.7&quot;},{&quot;version&quot;:&quot;v0.4.6&quot;},{&quot;version&quot;:&quot;v0.4.5&quot;},{&quot;version&quot;:&quot;v0.4.2&quot;},{&quot;version&quot;:&quot;v0.4.1&quot;},{&quot;version&quot;:&quot;v0.4.0&quot;},{&quot;version&quot;:&quot;v0.3.1&quot;},{&quot;version&quot;:&quot;v0.3.0&quot;},{&quot;version&quot;:&quot;v0.2.1&quot;},{&quot;version&quot;:&quot;v0.2.0&quot;},{&quot;version&quot;:&quot;v0.1.1&quot;}],&quot;title&quot;:&quot;ORPO Trainer&quot;}"> <div class="z-2 w-full flex-none lg:flex lg:h-dvh lg:w-[270px] lg:flex-col 2xl:w-[300px] false"><div class="shadow-alternate flex h-auto w-full items-center rounded-b-xl border-b bg-white py-2 text-lg leading-tight lg:hidden"> <div class="flex flex-1 cursor-pointer flex-col justify-center self-stretch pl-6"><p class="text-sm text-gray-400 first-letter:capitalize">TRL documentation </p> <div class="mr-2 flex items-center"><p class="font-semibold">ORPO Trainer</p> <svg class="text-xl false" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path d="M16.293 9.293L12 13.586L7.707 9.293l-1.414 1.414L12 16.414l5.707-5.707z" fill="currentColor"></path></svg></div></div> <button class="hover:shadow-alternate group ml-auto mr-6 inline-flex flex-none cursor-pointer rounded-xl border p-2"><svg class="text-gray-500 group-hover:text-gray-700" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M30 28.59L22.45 21A11 11 0 1 0 21 22.45L28.59 30zM5 14a9 9 0 1 1 9 9a9 9 0 0 1-9-9z" fill="currentColor"></path></svg></button></div> <div class="bg-linear-to-r hidden flex-col justify-between border-b border-r bg-white p-4 lg:flex from-blue-50 to-white dark:from-gray-900 dark:to-gray-950"><div class="group relative mb-2 flex min-w-[50%] items-center self-start text-lg font-bold leading-tight first-letter:capitalize"><div class="mr-1.5 h-1.5 w-1.5 rounded-full bg-blue-500 flex-none"></div> <h1>TRL</h1> <svg class="opacity-50 ml-0.5 flex-none group-hover:opacity-100" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path d="M16.293 9.293L12 13.586L7.707 9.293l-1.414 1.414L12 16.414l5.707-5.707z" fill="currentColor"></path></svg> <select class="outline-hidden absolute inset-0 border-none bg-white text-base opacity-0"><option value="/docs">🏡 View all docs</option><option value="/docs/optimum-neuron" >AWS Trainium &amp; Inferentia</option><option value="/docs/accelerate" >Accelerate</option><option value="/docs/sagemaker" >Amazon SageMaker</option><option value="https://argilla-io.github.io/argilla/" >Argilla</option><option value="/docs/autotrain" >AutoTrain</option><option value="/docs/bitsandbytes" >Bitsandbytes</option><option value="/docs/chat-ui" >Chat UI</option><option value="/docs/competitions" >Competitions</option><option value="/docs/dataset-viewer" >Dataset viewer</option><option value="/docs/datasets" >Datasets</option><option value="/docs/diffusers" >Diffusers</option><option value="https://distilabel.argilla.io/" >Distilabel</option><option value="/docs/evaluate" >Evaluate</option><option value="https://www.gradio.app/docs/" >Gradio</option><option value="/docs/hub" >Hub</option><option value="/docs/huggingface_hub" >Hub Python Library</option><option value="/docs/hugs" >Hugging Face Generative AI Services (HUGS)</option><option value="/docs/huggingface.js" >Huggingface.js</option><option value="/docs/api-inference" >Inference API (serverless)</option><option value="/docs/inference-endpoints" >Inference Endpoints (dedicated)</option><option value="/docs/leaderboards" >Leaderboards</option><option value="/docs/lighteval" >Lighteval</option><option value="/docs/optimum" >Optimum</option><option value="/docs/peft" >PEFT</option><option value="/docs/safetensors" >Safetensors</option><option value="https://sbert.net/" >Sentence Transformers</option><option value="/docs/trl" selected>TRL</option><option value="/tasks" >Tasks</option><option value="/docs/text-embeddings-inference" >Text Embeddings Inference</option><option value="/docs/text-generation-inference" >Text Generation Inference</option><option value="/docs/tokenizers" >Tokenizers</option><option value="/docs/transformers" >Transformers</option><option value="/docs/transformers.js" >Transformers.js</option><option value="/docs/smolagents" >smolagents</option><option value="/docs/timm" >timm</option></select></div> <button class="shadow-alternate mb-2 flex w-full items-center rounded-full border bg-white px-2 py-1 text-left text-sm text-gray-400 ring-indigo-200 hover:bg-indigo-50 hover:ring-2 dark:border-gray-700 dark:ring-yellow-600 dark:hover:bg-gray-900 dark:hover:text-yellow-500"><svg class="flex-none mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M30 28.59L22.45 21A11 11 0 1 0 21 22.45L28.59 30zM5 14a9 9 0 1 1 9 9a9 9 0 0 1-9-9z" fill="currentColor"></path></svg> <div>Search documentation</div> </button> <div class="flex items-center"> <select class="form-input mt-0! w-20! border! dark:text-gray-400! mr-1 rounded-sm border-gray-200 p-1 text-xs uppercase"><option value="0" >main</option><option value="1" selected>v0.15.2</option><option value="2" >v0.14.0</option><option value="3" >v0.13.0</option><option value="4" >v0.12.2</option><option value="5" >v0.11.4</option><option value="6" >v0.10.1</option><option value="7" >v0.9.6</option><option value="8" >v0.8.6</option><option value="9" >v0.7.11</option><option value="10" >v0.6.0</option><option value="11" >v0.5.0</option><option value="12" >v0.4.7</option><option value="13" >v0.3.1</option><option value="14" >v0.2.1</option><option value="15" >v0.1.1</option></select> <select class="form-input dark:text-gray-400! mr-1 rounded-sm border-gray-200 p-1 text-xs w-12! mt-0! border!"><option value="en" selected>EN</option></select> <div class="relative inline-block "> <button class="rounded-full border border-gray-100 pl-2 py-1 pr-2.5 flex items-center text-sm text-gray-500 bg-white hover:bg-yellow-50 hover:border-yellow-200 dark:hover:bg-gray-800 dark:hover:border-gray-950 dark:border-gray-800 " type="button"> <svg class=" text-yellow-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24" fill="currentColor"><path d="M6.05 4.14l-.39-.39a.993.993 0 0 0-1.4 0l-.01.01a.984.984 0 0 0 0 1.4l.39.39c.39.39 1.01.39 1.4 0l.01-.01a.984.984 0 0 0 0-1.4zM3.01 10.5H1.99c-.55 0-.99.44-.99.99v.01c0 .55.44.99.99.99H3c.56.01 1-.43 1-.98v-.01c0-.56-.44-1-.99-1zm9-9.95H12c-.56 0-1 .44-1 .99v.96c0 .55.44.99.99.99H12c.56.01 1-.43 1-.98v-.97c0-.55-.44-.99-.99-.99zm7.74 3.21c-.39-.39-1.02-.39-1.41-.01l-.39.39a.984.984 0 0 0 0 1.4l.01.01c.39.39 1.02.39 1.4 0l.39-.39a.984.984 0 0 0 0-1.4zm-1.81 15.1l.39.39a.996.996 0 1 0 1.41-1.41l-.39-.39a.993.993 0 0 0-1.4 0c-.4.4-.4 1.02-.01 1.41zM20 11.49v.01c0 .55.44.99.99.99H22c.55 0 .99-.44.99-.99v-.01c0-.55-.44-.99-.99-.99h-1.01c-.55 0-.99.44-.99.99zM12 5.5c-3.31 0-6 2.69-6 6s2.69 6 6 6s6-2.69 6-6s-2.69-6-6-6zm-.01 16.95H12c.55 0 .99-.44.99-.99v-.96c0-.55-.44-.99-.99-.99h-.01c-.55 0-.99.44-.99.99v.96c0 .55.44.99.99.99zm-7.74-3.21c.39.39 1.02.39 1.41 0l.39-.39a.993.993 0 0 0 0-1.4l-.01-.01a.996.996 0 0 0-1.41 0l-.39.39c-.38.4-.38 1.02.01 1.41z"></path></svg> </button> </div> <a href="https://github.com/huggingface/trl" class="group ml-auto text-xs text-gray-500 hover:text-gray-700 hover:underline dark:hover:text-gray-300"><svg class="inline-block text-gray-500 group-hover:text-gray-700 dark:group-hover:text-gray-300 mr-1.5 -mt-1 w-4 h-4" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1.03em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 250"><path d="M128.001 0C57.317 0 0 57.307 0 128.001c0 56.554 36.676 104.535 87.535 121.46c6.397 1.185 8.746-2.777 8.746-6.158c0-3.052-.12-13.135-.174-23.83c-35.61 7.742-43.124-15.103-43.124-15.103c-5.823-14.795-14.213-18.73-14.213-18.73c-11.613-7.944.876-7.78.876-7.78c12.853.902 19.621 13.19 19.621 13.19c11.417 19.568 29.945 13.911 37.249 10.64c1.149-8.272 4.466-13.92 8.127-17.116c-28.431-3.236-58.318-14.212-58.318-63.258c0-13.975 5-25.394 13.188-34.358c-1.329-3.224-5.71-16.242 1.24-33.874c0 0 10.749-3.44 35.21 13.121c10.21-2.836 21.16-4.258 32.038-4.307c10.878.049 21.837 1.47 32.066 4.307c24.431-16.56 35.165-13.12 35.165-13.12c6.967 17.63 2.584 30.65 1.255 33.873c8.207 8.964 13.173 20.383 13.173 34.358c0 49.163-29.944 59.988-58.447 63.157c4.591 3.972 8.682 11.762 8.682 23.704c0 17.126-.148 30.91-.148 35.126c0 3.407 2.304 7.398 8.792 6.14C219.37 232.5 256 184.537 256 128.002C256 57.307 198.691 0 128.001 0zm-80.06 182.34c-.282.636-1.283.827-2.194.39c-.929-.417-1.45-1.284-1.15-1.922c.276-.655 1.279-.838 2.205-.399c.93.418 1.46 1.293 1.139 1.931zm6.296 5.618c-.61.566-1.804.303-2.614-.591c-.837-.892-.994-2.086-.375-2.66c.63-.566 1.787-.301 2.626.591c.838.903 1 2.088.363 2.66zm4.32 7.188c-.785.545-2.067.034-2.86-1.104c-.784-1.138-.784-2.503.017-3.05c.795-.547 2.058-.055 2.861 1.075c.782 1.157.782 2.522-.019 3.08zm7.304 8.325c-.701.774-2.196.566-3.29-.49c-1.119-1.032-1.43-2.496-.726-3.27c.71-.776 2.213-.558 3.315.49c1.11 1.03 1.45 2.505.701 3.27zm9.442 2.81c-.31 1.003-1.75 1.459-3.199 1.033c-1.448-.439-2.395-1.613-2.103-2.626c.301-1.01 1.747-1.484 3.207-1.028c1.446.436 2.396 1.602 2.095 2.622zm10.744 1.193c.036 1.055-1.193 1.93-2.715 1.95c-1.53.034-2.769-.82-2.786-1.86c0-1.065 1.202-1.932 2.733-1.958c1.522-.03 2.768.818 2.768 1.868zm10.555-.405c.182 1.03-.875 2.088-2.387 2.37c-1.485.271-2.861-.365-3.05-1.386c-.184-1.056.893-2.114 2.376-2.387c1.514-.263 2.868.356 3.061 1.403z" fill="currentColor"></path></svg> </a></div></div> <nav class="hidden flex-auto lg:flex bottom-0 left-0 w-full flex-col overflow-y-auto border-r px-4 pb-16 pt-3 text-[0.95rem] lg:w-[270px] 2xl:w-[300px]"> <div class="group flex cursor-pointer items-center pl-2 text-[0.8rem] font-semibold uppercase leading-9 hover:text-gray-700 dark:hover:text-gray-300 ml-0"><div class="flex after:absolute after:right-4 after:text-gray-500 group-hover:after:content-['▶'] after:rotate-90 after:transform"><span><span class="inline-block space-x-1 leading-5"><span><!-- HTML_TAG_START -->Getting started<!-- HTML_TAG_END --></span> </span></span> </div></div> <div class="flex flex-col"><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/index" id="index"><!-- HTML_TAG_START -->TRL<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/installation" id="installation"><!-- HTML_TAG_START -->Installation<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/quickstart" id="quickstart"><!-- HTML_TAG_START -->Quickstart<!-- HTML_TAG_END --> </a> </div> <div class="group flex cursor-pointer items-center pl-2 text-[0.8rem] font-semibold uppercase leading-9 hover:text-gray-700 dark:hover:text-gray-300 ml-0"><div class="flex after:absolute after:right-4 after:text-gray-500 group-hover:after:content-['▶'] after:rotate-90 after:transform"><span><span class="inline-block space-x-1 leading-5"><span><!-- HTML_TAG_START -->Conceptual Guides<!-- HTML_TAG_END --></span> </span></span> </div></div> <div class="flex flex-col"><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/dataset_formats" id="dataset_formats"><!-- HTML_TAG_START -->Dataset Formats<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/how_to_train" id="how_to_train"><!-- HTML_TAG_START -->Training FAQ<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/logging" id="logging"><!-- HTML_TAG_START -->Understanding Logs<!-- HTML_TAG_END --> </a> </div> <div class="group flex cursor-pointer items-center pl-2 text-[0.8rem] font-semibold uppercase leading-9 hover:text-gray-700 dark:hover:text-gray-300 ml-0"><div class="flex after:absolute after:right-4 after:text-gray-500 group-hover:after:content-['▶'] after:rotate-90 after:transform"><span><span class="inline-block space-x-1 leading-5"><span><!-- HTML_TAG_START -->How-to guides<!-- HTML_TAG_END --></span> </span></span> </div></div> <div class="flex flex-col"><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/clis" id="clis"><!-- HTML_TAG_START -->Command Line Interface (CLI)<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/customization" id="customization"><!-- HTML_TAG_START -->Customizing the Training<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/reducing_memory_usage" id="reducing_memory_usage"><!-- HTML_TAG_START -->Reducing Memory Usage<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/speeding_up_training" id="speeding_up_training"><!-- HTML_TAG_START -->Speeding Up Training<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/use_model" id="use_model"><!-- HTML_TAG_START -->Using Trained Models<!-- HTML_TAG_END --> </a> </div> <div class="group flex cursor-pointer items-center pl-2 text-[0.8rem] font-semibold uppercase leading-9 hover:text-gray-700 dark:hover:text-gray-300 ml-0"><div class="flex after:absolute after:right-4 after:text-gray-500 group-hover:after:content-['▶'] after:rotate-90 after:transform"><span><span class="inline-block space-x-1 leading-5"><span><!-- HTML_TAG_START -->Integrations<!-- HTML_TAG_END --></span> </span></span> </div></div> <div class="flex flex-col"><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/deepspeed_integration" id="deepspeed_integration"><!-- HTML_TAG_START -->DeepSpeed<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/liger_kernel_integration" id="liger_kernel_integration"><!-- HTML_TAG_START -->Liger Kernel<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/peft_integration" id="peft_integration"><!-- HTML_TAG_START -->PEFT<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/unsloth_integration" id="unsloth_integration"><!-- HTML_TAG_START -->Unsloth<!-- HTML_TAG_END --> </a> </div> <div class="group flex cursor-pointer items-center pl-2 text-[0.8rem] font-semibold uppercase leading-9 hover:text-gray-700 dark:hover:text-gray-300 ml-0"><div class="flex after:absolute after:right-4 after:text-gray-500 group-hover:after:content-['▶'] after:rotate-90 after:transform"><span><span class="inline-block space-x-1 leading-5"><span><!-- HTML_TAG_START -->Examples<!-- HTML_TAG_END --></span> </span></span> </div></div> <div class="flex flex-col"><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/example_overview" id="example_overview"><!-- HTML_TAG_START -->Example Overview<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/community_tutorials" id="community_tutorials"><!-- HTML_TAG_START -->Community Tutorials<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/sentiment_tuning" id="sentiment_tuning"><!-- HTML_TAG_START -->Sentiment Tuning<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/using_llama_models" id="using_llama_models"><!-- HTML_TAG_START -->Training StackLlama<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/detoxifying_a_lm" id="detoxifying_a_lm"><!-- HTML_TAG_START -->Detoxifying a Language Model<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/learning_tools" id="learning_tools"><!-- HTML_TAG_START -->Learning to Use Tools<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/multi_adapter_rl" id="multi_adapter_rl"><!-- HTML_TAG_START -->Multi Adapter RLHF<!-- HTML_TAG_END --> </a> </div> <div class="group flex cursor-pointer items-center pl-2 text-[0.8rem] font-semibold uppercase leading-9 hover:text-gray-700 dark:hover:text-gray-300 ml-0"><div class="flex after:absolute after:right-4 after:text-gray-500 group-hover:after:content-['▶'] after:rotate-90 after:transform"><span><span class="inline-block space-x-1 leading-5"><span><!-- HTML_TAG_START -->API<!-- HTML_TAG_END --></span> </span></span> </div></div> <div class="flex flex-col"> <div class="group flex cursor-pointer items-center pl-2 text-[0.8rem] font-semibold uppercase leading-9 hover:text-gray-700 dark:hover:text-gray-300 ml-2"><div class="flex after:absolute after:right-4 after:text-gray-500 group-hover:after:content-['▶'] after:rotate-90 after:transform"><span><span class="inline-block space-x-1 leading-5"><span><!-- HTML_TAG_START -->Trainers<!-- HTML_TAG_END --></span> </span></span> </div></div> <div class="flex flex-col"><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/alignprop_trainer" id="alignprop_trainer"><!-- HTML_TAG_START -->AlignProp<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/bco_trainer" id="bco_trainer"><!-- HTML_TAG_START -->BCO<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/cpo_trainer" id="cpo_trainer"><!-- HTML_TAG_START -->CPO<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/ddpo_trainer" id="ddpo_trainer"><!-- HTML_TAG_START -->DDPO<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/dpo_trainer" id="dpo_trainer"><!-- HTML_TAG_START -->DPO<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/online_dpo_trainer" id="online_dpo_trainer"><!-- HTML_TAG_START -->Online DPO<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/gkd_trainer" id="gkd_trainer"><!-- HTML_TAG_START -->GKD<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/grpo_trainer" id="grpo_trainer"><!-- HTML_TAG_START -->GRPO<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/kto_trainer" id="kto_trainer"><!-- HTML_TAG_START -->KTO<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/nash_md_trainer" id="nash_md_trainer"><!-- HTML_TAG_START -->Nash-MD<!-- HTML_TAG_END --> </a><a class="bg-linear-to-br rounded-xl from-black to-gray-900 py-1 pl-2 pr-2 text-white first:mt-1 last:mb-4 dark:from-gray-800 dark:to-gray-900 ml-4" href="/docs/trl/orpo_trainer" id="orpo_trainer"><!-- HTML_TAG_START -->ORPO<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/ppo_trainer" id="ppo_trainer"><!-- HTML_TAG_START -->PPO<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/prm_trainer" id="prm_trainer"><!-- HTML_TAG_START -->PRM<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/reward_trainer" id="reward_trainer"><!-- HTML_TAG_START -->Reward<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/rloo_trainer" id="rloo_trainer"><!-- HTML_TAG_START -->RLOO<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/sft_trainer" id="sft_trainer"><!-- HTML_TAG_START -->SFT<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/iterative_sft_trainer" id="iterative_sft_trainer"><!-- HTML_TAG_START -->Iterative SFT<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-4" href="/docs/trl/xpo_trainer" id="xpo_trainer"><!-- HTML_TAG_START -->XPO<!-- HTML_TAG_END --> </a> </div><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/models" id="models"><!-- HTML_TAG_START -->Model Classes<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/best_of_n" id="best_of_n"><!-- HTML_TAG_START -->Best of N Sampling<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/judges" id="judges"><!-- HTML_TAG_START -->Judges<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/callbacks" id="callbacks"><!-- HTML_TAG_START -->Callbacks<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/data_utils" id="data_utils"><!-- HTML_TAG_START -->Data Utilities<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/text_environments" id="text_environments"><!-- HTML_TAG_START -->Text Environments<!-- HTML_TAG_END --> </a><a class="transform py-1 pl-2 pr-2 text-gray-500 first:mt-1 last:mb-4 hover:translate-x-px hover:text-black dark:hover:text-gray-300 ml-2" href="/docs/trl/script_utils" id="script_utils"><!-- HTML_TAG_START -->Script Utilities<!-- HTML_TAG_END --> </a> </div></nav></div></div></div> <div class="z-1 min-w-0 flex-1"> <div class="px-6 pt-6 md:px-12 md:pb-16 md:pt-16"><div class="max-w-4xl mx-auto mb-10"><div class="bg-linear-to-br relative overflow-hidden rounded-xl from-orange-300/10 px-4 py-5 ring-1 ring-orange-100/70 md:px-6 md:py-8"><img alt="Hugging Face's logo" class="absolute -bottom-6 -right-6 w-28 -rotate-45 md:hidden" src="/front/assets/huggingface_logo-noborder.svg"> <div class="mb-2 text-2xl font-bold dark:text-gray-200 md:mb-0">Join the Hugging Face community</div> <p class="mb-4 text-lg text-gray-400 dark:text-gray-300 md:mb-8">and get access to the augmented documentation experience </p> <div class="mb-8 hidden space-y-4 md:block xl:flex xl:space-x-6 xl:space-y-0"><div class="flex items-center"><div class="bg-linear-to-br mr-3 flex h-9 w-9 flex-none items-center justify-center rounded-lg from-indigo-100 to-indigo-100/20 dark:to-indigo-100"><svg class="text-indigo-400 group-hover:text-indigo-500" style="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg></div> <div class="text-smd leading-tight text-gray-500 dark:text-gray-300 xl:max-w-[200px] 2xl:text-base">Collaborate on models, datasets and Spaces </div></div> <div class="flex items-center"><div class="bg-linear-to-br mr-3 flex h-9 w-9 flex-none items-center justify-center rounded-lg from-orange-100 to-orange-100/20 dark:to-orange-50"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" class="text-xl text-yellow-400" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path d="M11 15H6l7-14v8h5l-7 14v-8z" fill="currentColor"></path></svg></div> <div class="text-smd leading-tight text-gray-500 dark:text-gray-300 xl:max-w-[200px] 2xl:text-base">Faster examples with accelerated inference </div></div> <div class="flex items-center"><div class="bg-linear-to-br mr-3 flex h-9 w-9 flex-none items-center justify-center rounded-lg from-gray-500/10 to-gray-500/5"><svg class="text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M14.9804 3C14.9217 3.0002 14.8631 3.00555 14.8054 3.016C11.622 3.58252 8.76073 5.30669 6.77248 7.85653C4.78422 10.4064 3.80955 13.6016 4.03612 16.8271C4.26268 20.0525 5.67447 23.0801 7.99967 25.327C10.3249 27.5738 13.3991 28.8811 16.6304 28.997C16.7944 29.003 16.9584 28.997 17.1204 28.997C19.2193 28.9984 21.2877 28.4943 23.1507 27.5274C25.0137 26.5605 26.6164 25.1592 27.8234 23.442C27.9212 23.294 27.9783 23.1229 27.9889 22.9458C27.9995 22.7687 27.9633 22.592 27.884 22.4333C27.8046 22.2747 27.6848 22.1397 27.5367 22.0421C27.3887 21.9444 27.2175 21.8875 27.0404 21.877C25.0426 21.7017 23.112 21.0693 21.3976 20.0288C19.6832 18.9884 18.231 17.5676 17.1533 15.8764C16.0756 14.1852 15.4011 12.2688 15.1822 10.2754C14.9632 8.28193 15.2055 6.26484 15.8904 4.38C15.9486 4.22913 15.97 4.06652 15.9527 3.90572C15.9354 3.74492 15.8799 3.59059 15.7909 3.45557C15.7019 3.32055 15.5819 3.20877 15.4409 3.12952C15.2999 3.05028 15.142 3.00587 14.9804 3Z" fill="currentColor"></path></svg></div> <div class="text-smd leading-tight text-gray-500 dark:text-gray-300 xl:max-w-[200px] 2xl:text-base">Switch between documentation themes </div></div></div> <div class="flex items-center space-x-2.5"><a href="/join"><button class="bg-linear-to-br shadow-xs rounded-lg bg-white from-gray-100/20 to-gray-200/60 px-5 py-1.5 font-semibold text-gray-700 ring-1 ring-gray-300/60 hover:to-gray-100/70 hover:ring-gray-300/30 active:shadow-inner">Sign Up</button></a> <p class="text-gray-500 dark:text-gray-300">to get started</p></div></div></div> <div class="prose-doc prose relative mx-auto max-w-4xl break-words"><!-- HTML_TAG_START --> <link href="/docs/trl/v0.15.2/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/entry/start.a67350a3.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/chunks/scheduler.d627b047.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/chunks/singletons.873c3aa8.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/chunks/index.a57a1c33.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/chunks/paths.b049ce47.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/entry/app.0e07cdbf.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/chunks/index.73c51727.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/nodes/0.fc3919dd.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/chunks/each.e59479a4.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/nodes/32.4c1c76e6.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/chunks/Docstring.488b7ad2.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/chunks/EditOnGithub.859b9ebc.js"> <link rel="modulepreload" href="/docs/trl/v0.15.2/en/_app/immutable/chunks/CodeBlock.b1cdc5f6.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;ORPO Trainer&quot;,&quot;local&quot;:&quot;orpo-trainer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Overview&quot;,&quot;local&quot;:&quot;overview&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Quick start&quot;,&quot;local&quot;:&quot;quick-start&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Expected dataset type&quot;,&quot;local&quot;:&quot;expected-dataset-type&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Example script&quot;,&quot;local&quot;:&quot;example-script&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Usage tips&quot;,&quot;local&quot;:&quot;usage-tips&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;For Mixture of Experts Models: Enabling the auxiliary loss&quot;,&quot;local&quot;:&quot;for-mixture-of-experts-models-enabling-the-auxiliary-loss&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Logged metrics&quot;,&quot;local&quot;:&quot;logged-metrics&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;ORPOTrainer&quot;,&quot;local&quot;:&quot;trl.ORPOTrainer&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;ORPOConfig&quot;,&quot;local&quot;:&quot;trl.ORPOConfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="orpo-trainer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#orpo-trainer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ORPO Trainer</span></h1> <p data-svelte-h="svelte-1avz9l5"><a href="https://huggingface.co/models?other=orpo,trl" rel="nofollow"><img src="https://img.shields.io/badge/All_models-ORPO-blue"></a> <a href="https://github.com/huggingface/smol-course/tree/main/2_preference_alignment" rel="nofollow"><img src="https://img.shields.io/badge/smol_course-Chapter_2-yellow"></a></p> <h2 class="relative group"><a id="overview" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#overview"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Overview</span></h2> <p data-svelte-h="svelte-lx4on6">Odds Ratio Preference Optimization (ORPO) was introduced in <a href="https://huggingface.co/papers/2403.07691" rel="nofollow">ORPO: Monolithic Preference Optimization without Reference Model</a> by <a href="https://huggingface.co/JW17" rel="nofollow">Jiwoo Hong</a>, <a href="https://huggingface.co/nlee-208" rel="nofollow">Noah Lee</a>, and <a href="https://huggingface.co/j6mes" rel="nofollow">James Thorne</a>.</p> <p data-svelte-h="svelte-vfdo9a">The abstract from the paper is the following:</p> <blockquote data-svelte-h="svelte-byw68w"><p>While recent preference alignment algorithms for language models have demonstrated promising results, supervised fine-tuning (SFT) remains imperative for achieving successful convergence. In this paper, we study the crucial role of SFT within the context of preference alignment, emphasizing that a minor penalty for the disfavored generation style is sufficient for preference-aligned SFT. Building on this foundation, we introduce a straightforward and innovative reference model-free monolithic odds ratio preference optimization algorithm, ORPO, eliminating the necessity for an additional preference alignment phase. We demonstrate, both empirically and theoretically, that the odds ratio is a sensible choice for contrasting favored and disfavored styles during SFT across the diverse sizes from 125M to 7B. Specifically, fine-tuning Phi-2 (2.7B), Llama-2 (7B), and Mistral (7B) with ORPO on the UltraFeedback alone surpasses the performance of state-of-the-art language models with more than 7B and 13B parameters: achieving up to 12.20% on AlpacaEval_{2.0} (Figure 1), 66.19% on IFEval (instruction-level loose, Table 6), and 7.32 in MT-Bench (Figure 12). We release code and model checkpoints for Mistral-ORPO-alpha (7B) and Mistral-ORPO-beta (7B).</p></blockquote> <p data-svelte-h="svelte-gby4fv">It studies the crucial role of SFT within the context of preference alignment. Using preference data the method posits that a minor penalty for the disfavored generation together with a strong adaption signal to the chosen response via a simple log odds ratio term appended to the NLL loss is sufficient for preference-aligned SFT.</p> <p data-svelte-h="svelte-14fu8cb">Thus ORPO is a reference model-free preference optimization algorithm eliminating the necessity for an additional preference alignment phase thus saving compute and memory.</p> <p data-svelte-h="svelte-1sqwqyq">The official code can be found in <a href="https://github.com/xfactlab/orpo" rel="nofollow">xfactlab/orpo</a>.</p> <p data-svelte-h="svelte-koevdu">This post-training method was contributed by <a href="https://huggingface.co/kashif" rel="nofollow">Kashif Rasul</a>, <a href="https://huggingface.co/lewtun" rel="nofollow">Lewis Tunstall</a> and <a href="https://huggingface.co/alvarobartt" rel="nofollow">Alvaro Bartolome</a>.</p> <h2 class="relative group"><a id="quick-start" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#quick-start"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Quick start</span></h2> <p data-svelte-h="svelte-1d01um3">This example demonstrates how to train a model using the ORPO method. We use the <a href="https://huggingface.co/Qwen/Qwen2-0.5B-Instruct" rel="nofollow">Qwen 0.5B model</a> as the base model. We use the preference data from the <a href="https://huggingface.co/datasets/openbmb/UltraFeedback" rel="nofollow">UltraFeedback dataset</a>. You can view the data in the dataset here:</p> <iframe src="https://huggingface.co/datasets/trl-lib/ultrafeedback_binarized/embed/viewer/default/train?row=0" frameborder="0" width="100%" height="560px"></iframe> <p data-svelte-h="svelte-uqytq6">Below is the script to train the model:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># train_orpo.py</span> <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> ORPOConfig, ORPOTrainer <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">&quot;Qwen/Qwen2-0.5B-Instruct&quot;</span>) tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;Qwen/Qwen2-0.5B-Instruct&quot;</span>) train_dataset = load_dataset(<span class="hljs-string">&quot;trl-lib/ultrafeedback_binarized&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>) training_args = ORPOConfig(output_dir=<span class="hljs-string">&quot;Qwen2-0.5B-ORPO&quot;</span>, logging_steps=<span class="hljs-number">10</span>) trainer = ORPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset) trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-15hino8">Execute the script using the following command:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch train_orpo.py<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1k4c3p2">Distributed across 8 GPUs, the training takes approximately 30 minutes. You can verify the training progress by checking the reward graph. An increasing trend in the reward margin indicates that the model is improving and generating better responses over time.</p> <p data-svelte-h="svelte-s4edz4"><img src="https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/orpo-qwen2-reward-margin.png"></p> <p data-svelte-h="svelte-1b6b955">To see how the <a href="https://huggingface.co/trl-lib/Qwen2-0.5B-ORPO" rel="nofollow">trained model</a> performs, you can use the <a href="clis#chat-interface">TRL Chat CLI</a>.</p> <pre data-svelte-h="svelte-14ixzxo"><code>$ trl chat --model_name_or_path trl-lib/Qwen2-0.5B-ORPO <strong><span style="color: red;">&lt;quentin_gallouedec&gt;:</span></strong> What is the best programming language? <strong><span style="color: blue;">&lt;trl-lib/Qwen2-0.5B-ORPO&gt;:</span></strong> It&#39;s challenging to determine the best programming language as no one language is perfect, as the complexity of a task and the type of project are significant factors. Some popular languages include Java, Python, JavaScript, and C++. If you have specific needs or requirements for a specific project, it&#39;s important to choose the language that best suits those needs. Here are some other factors to consider when choosing a programming language for a project: <strong><span style="color: green;">• Language proficiency:</span></strong> A good programming language is more likely to be easy to understand and use, and will allow developers to collaborate on projects more efficiently. <strong><span style="color: green;">• Ease of use:</span></strong> There are tools and libraries available to make programming more accessible, so developers should choose a language that can help them get started easier. <strong><span style="color: green;">• Code readability:</span></strong> A clear and concise codebase should be easy to read and understand, especially when working with large projects. <strong><span style="color: green;">• Tool and framework support:</span></strong> There are numerous libraries available for Python, Java, and JavaScript, along with tools like IDEs and static code analysis tools. <strong><span style="color: green;">• Accessibility:</span></strong> Some languages and tools have features that make them more accessible to developers with disabilities, such as support for screen readers. <strong><span style="color: green;">• Version control:</span></strong> As your projects grow and complexity increases, version control tools can be beneficial for tracking changes. </code></pre> <h2 class="relative group"><a id="expected-dataset-type" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#expected-dataset-type"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Expected dataset type</span></h2> <p data-svelte-h="svelte-aujbtv">ORPO requires a <a href="dataset_formats#preference">preference dataset</a>. The <a href="/docs/trl/v0.15.2/en/orpo_trainer#trl.ORPOTrainer">ORPOTrainer</a> supports both <a href="dataset_formats#conversational">conversational</a> and <a href="dataset_formats#standard">standard</a> dataset format. When provided with a conversational dataset, the trainer will automatically apply the chat template to the dataset.</p> <p data-svelte-h="svelte-zywaiy">Although the <a href="/docs/trl/v0.15.2/en/orpo_trainer#trl.ORPOTrainer">ORPOTrainer</a> supports both explicit and implicit prompts, we recommend using explicit prompts. If provided with an implicit prompt dataset, the trainer will automatically extract the prompt from the <code>&quot;chosen&quot;</code> and <code>&quot;rejected&quot;</code> columns. For more information, refer to the <a href="dataset_formats#preference">preference style</a> section.</p> <h2 class="relative group"><a id="example-script" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-script"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example script</span></h2> <p data-svelte-h="svelte-197aywy">We provide an example script to train a model using the ORPO method. The script is available in <a href="https://github.com/huggingface/trl/blob/main/examples/scripts/orpo.py" rel="nofollow"><code>examples/scripts/orpo.py</code></a></p> <p data-svelte-h="svelte-1clopwb">To test the ORPO script with the <a href="https://huggingface.co/Qwen/Qwen2-0.5B-Instruct" rel="nofollow">Qwen2 0.5B model</a> on the <a href="https://huggingface.co/datasets/trl-lib/ultrafeedback_binarized" rel="nofollow">UltraFeedback dataset</a>, run the following command:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch examples/scripts/orpo.py \ --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ --dataset_name trl-lib/ultrafeedback_binarized \ --num_train_epochs 1 \ --logging_steps 25 \ --output_dir Qwen2-0.5B-ORPO<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="usage-tips" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#usage-tips"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Usage tips</span></h2> <h3 class="relative group"><a id="for-mixture-of-experts-models-enabling-the-auxiliary-loss" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#for-mixture-of-experts-models-enabling-the-auxiliary-loss"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>For Mixture of Experts Models: Enabling the auxiliary loss</span></h3> <p data-svelte-h="svelte-14px1ga">MOEs are the most efficient if the load is about equally distributed between experts.<br> To ensure that we train MOEs similarly during preference-tuning, it is beneficial to add the auxiliary loss from the load balancer to the final loss.</p> <p data-svelte-h="svelte-z2ii65">This option is enabled by setting <code>output_router_logits=True</code> in the model config (e.g. <a href="https://huggingface.co/docs/transformers/v4.49.0/en/model_doc/mixtral#transformers.MixtralConfig" rel="nofollow">MixtralConfig</a>).<br> To scale how much the auxiliary loss contributes to the total loss, use the hyperparameter <code>router_aux_loss_coef=...</code> (default: <code>0.001</code>) in the model config.</p> <h2 class="relative group"><a id="logged-metrics" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#logged-metrics"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Logged metrics</span></h2> <p data-svelte-h="svelte-132s7j9">While training and evaluating we record the following reward metrics:</p> <ul data-svelte-h="svelte-1uuduya"><li><code>rewards/chosen</code>: the mean log probabilities of the policy model for the chosen responses scaled by beta</li> <li><code>rewards/rejected</code>: the mean log probabilities of the policy model for the rejected responses scaled by beta</li> <li><code>rewards/accuracies</code>: mean of how often the chosen rewards are &gt; than the corresponding rejected rewards</li> <li><code>rewards/margins</code>: the mean difference between the chosen and corresponding rejected rewards</li> <li><code>log_odds_chosen</code>: the mean log odds ratio of the chosen responses over the rejected responses</li> <li><code>log_odds_ratio</code>: the mean of the <code>log(sigmoid(log_odds_chosen))</code></li> <li><code>nll_loss</code>: the mean negative log likelihood loss from the SFT part of the loss over chosen responses</li></ul> <h2 class="relative group"><a id="trl.ORPOTrainer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ORPOTrainer</span></h2> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer"><!-- HTML_TAG_START --><h3 class="!m-0"><span class="flex-1 break-all md:text-lg bg-gradient-to-r px-2.5 py-1.5 rounded-xl from-indigo-50/70 to-white dark:from-gray-900 dark:to-gray-950 dark:text-indigo-300 text-indigo-700"><svg class="mr-1.5 text-indigo-500 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width=".8em" height=".8em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg><span class="font-light">class</span> <span class="font-medium">trl.</span><span class="font-semibold">ORPOTrainer</span></span></h3><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L85" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">model<span class="opacity-60">: typing.Union[transformers.modeling_utils.PreTrainedModel, torch.nn.modules.module.Module, str, NoneType] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">args<span class="opacity-60">: typing.Optional[trl.trainer.orpo_config.ORPOConfig] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">data_collator<span class="opacity-60">: typing.Optional[transformers.data.data_collator.DataCollator] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">train_dataset<span class="opacity-60">: typing.Optional[datasets.arrow_dataset.Dataset] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">eval_dataset<span class="opacity-60">: typing.Union[datasets.arrow_dataset.Dataset, dict[str, datasets.arrow_dataset.Dataset], NoneType] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">processing_class<span class="opacity-60">: typing.Union[transformers.tokenization_utils_base.PreTrainedTokenizerBase, transformers.image_processing_utils.BaseImageProcessor, transformers.feature_extraction_utils.FeatureExtractionMixin, transformers.processing_utils.ProcessorMixin, NoneType] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">model_init<span class="opacity-60">: typing.Optional[typing.Callable[[], transformers.modeling_utils.PreTrainedModel]] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">callbacks<span class="opacity-60">: typing.Optional[list[transformers.trainer_callback.TrainerCallback]] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">optimizers<span class="opacity-60">: tuple = (None, None)</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">preprocess_logits_for_metrics<span class="opacity-60">: typing.Optional[typing.Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">peft_config<span class="opacity-60">: typing.Optional[dict] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">compute_metrics<span class="opacity-60">: typing.Optional[typing.Callable[[transformers.trainer_utils.EvalLoopOutput], dict]] = None</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> <p class="flex items-center font-semibold !mt-2 !mb-2 text-gray-800" data-svelte-h="svelte-lt6pb6">Parameters <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700 ml-3"></span></p> <ul class="px-2"><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.model" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.model"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>model</strong> (<code>transformers.PreTrainedModel</code>) &#x2014; The model to train, preferably an <code>AutoModelForSequenceClassification</code>.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.args" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.args"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>args</strong> (<code>ORPOConfig</code>) &#x2014; The ORPO config arguments to use for training.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.data_collator" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.data_collator"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>data_collator</strong> (<code>transformers.DataCollator</code>) &#x2014; The data collator to use for training. If None is specified, the default data collator (<code>DPODataCollatorWithPadding</code>) will be used which will pad the sequences to the maximum length of the sequences in the batch, given a dataset of paired sequences.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.train_dataset" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.train_dataset"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>train_dataset</strong> (<code>datasets.Dataset</code>) &#x2014; The dataset to use for training.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.eval_dataset" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.eval_dataset"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>eval_dataset</strong> (<code>datasets.Dataset</code>) &#x2014; The dataset to use for evaluation.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.processing_class" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.processing_class"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>processing_class</strong> (<code>PreTrainedTokenizerBase</code> or <code>BaseImageProcessor</code> or <code>FeatureExtractionMixin</code> or <code>ProcessorMixin</code>, <em>optional</em>) &#x2014; Processing class used to process the data. If provided, will be used to automatically process the inputs for the model, and it will be saved along the model to make it easier to rerun an interrupted training or reuse the fine-tuned model.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.model_init" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.model_init"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>model_init</strong> (<code>Callable[[], transformers.PreTrainedModel]</code>) &#x2014; The model initializer to use for training. If None is specified, the default model initializer will be used.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.callbacks" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.callbacks"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>callbacks</strong> (<code>list[transformers.TrainerCallback]</code>) &#x2014; The callbacks to use for training.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.optimizers" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.optimizers"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>optimizers</strong> (<code>tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]</code>) &#x2014; The optimizer and scheduler to use for training.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.preprocess_logits_for_metrics" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.preprocess_logits_for_metrics"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>preprocess_logits_for_metrics</strong> (<code>Callable[[torch.Tensor, torch.Tensor], torch.Tensor]</code>) &#x2014; The function to use to preprocess the logits before computing the metrics.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.peft_config" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.peft_config"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>peft_config</strong> (<code>dict</code>, defaults to <code>None</code>) &#x2014; The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in a PEFT model.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.compute_metrics" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.compute_metrics"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>compute_metrics</strong> (<code>Callable[[EvalPrediction], dict]</code>, <em>optional</em>) &#x2014; The function to use to compute the metrics. Must take a <code>EvalPrediction</code> and return a dictionary string to metric values.<!-- HTML_TAG_END --> </span></span> </li></ul> </div></div> <p data-svelte-h="svelte-1ggwo37">Initialize ORPOTrainer.</p> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.build_tokenized_answer"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>build_tokenized_answer</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.build_tokenized_answer" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.build_tokenized_answer"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L392" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">prompt<span class="opacity-60"></span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">answer<span class="opacity-60"></span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-mcz8nm">Llama tokenizer does satisfy <code>enc(a + b) = enc(a) + enc(b)</code>. It does ensure <code>enc(a + b) = enc(a) + enc(a + b)[len(enc(a)):]</code>. Reference: <a href="https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257" rel="nofollow">https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257</a></p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.concatenated_forward"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>concatenated_forward</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.concatenated_forward" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.concatenated_forward"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L729" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">model<span class="opacity-60">: Module</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">batch<span class="opacity-60">: dict</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-16i1nac">Run the given model on the given batch of inputs, concatenating the chosen and rejected inputs together.</p> <p data-svelte-h="svelte-tgti68">We do this to avoid doing two forward passes, because it’s faster for FSDP.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.concatenated_inputs"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>concatenated_inputs</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.concatenated_inputs" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.concatenated_inputs"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L598" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">batch<span class="opacity-60">: dict</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">is_encoder_decoder<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">label_pad_token_id<span class="opacity-60">: int = -100</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">padding_value<span class="opacity-60">: int = 0</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">device<span class="opacity-60">: typing.Optional[torch.device] = None</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> <p class="flex items-center font-semibold !mt-2 !mb-2 text-gray-800" data-svelte-h="svelte-lt6pb6">Parameters <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700 ml-3"></span></p> <ul class="px-2"><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.concatenated_inputs.batch" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.concatenated_inputs.batch"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>batch</strong> &#x2014; A batch of data. Must contain the keys &#x2018;chosen_input_ids&#x2019; and &#x2018;rejected_input_ids&#x2019;, which are tensors of shape (batch_size, sequence_length).<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.concatenated_inputs.is_encoder_decoder" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.concatenated_inputs.is_encoder_decoder"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>is_encoder_decoder</strong> &#x2014; Whether the model is an encoder-decoder model.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.concatenated_inputs.label_pad_token_id" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.concatenated_inputs.label_pad_token_id"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>label_pad_token_id</strong> &#x2014; The label pad token id.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.concatenated_inputs.padding_value" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.concatenated_inputs.padding_value"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>padding_value</strong> &#x2014; The padding value to use for the concatenated inputs_ids.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.concatenated_inputs.device" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.concatenated_inputs.device"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>device</strong> &#x2014; The device for the concatenated inputs.<!-- HTML_TAG_END --> </span></span> </li></ul> </div></div> <p data-svelte-h="svelte-1cvsgkk">Concatenate the chosen and rejected inputs into a single tensor.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.create_model_card"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>create_model_card</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.create_model_card" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.create_model_card"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L1045" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">model_name<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">dataset_name<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">tags<span class="opacity-60">: typing.Union[str, list[str], NoneType] = None</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> <p class="flex items-center font-semibold !mt-2 !mb-2 text-gray-800" data-svelte-h="svelte-lt6pb6">Parameters <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700 ml-3"></span></p> <ul class="px-2"><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.create_model_card.model_name" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.create_model_card.model_name"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>model_name</strong> (<code>str</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014; Name of the model.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.create_model_card.dataset_name" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.create_model_card.dataset_name"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>dataset_name</strong> (<code>str</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014; Name of the dataset used for training.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.create_model_card.tags" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.create_model_card.tags"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>tags</strong> (<code>str</code>, <code>list[str]</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014; Tags to be associated with the model card.<!-- HTML_TAG_END --> </span></span> </li></ul> </div></div> <p data-svelte-h="svelte-1mh859w">Creates a draft of a model card using the information available to the <code>Trainer</code>.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.evaluation_loop"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>evaluation_loop</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.evaluation_loop" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.evaluation_loop"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L950" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">dataloader<span class="opacity-60">: DataLoader</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">description<span class="opacity-60">: str</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">prediction_loss_only<span class="opacity-60">: typing.Optional[bool] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">ignore_keys<span class="opacity-60">: typing.Optional[list[str]] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">metric_key_prefix<span class="opacity-60">: str = 'eval'</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-8qlty5">Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by <code>Trainer.evaluate()</code> and <code>Trainer.predict()</code>.</p> <p data-svelte-h="svelte-1tyo99t">Works both with or without labels.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.generate_from_model"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>generate_from_model</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.generate_from_model" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.generate_from_model"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L885" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">model<span class="opacity-60"></span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">batch<span class="opacity-60">: dict</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-18uwvbi">Generate samples from the model and reference model for the given batch of inputs.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.get_batch_logps"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>get_batch_logps</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.get_batch_logps" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.get_batch_logps"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L691" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">logits<span class="opacity-60">: FloatTensor</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">labels<span class="opacity-60">: LongTensor</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">average_log_prob<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">label_pad_token_id<span class="opacity-60">: int = -100</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">is_encoder_decoder<span class="opacity-60">: bool = False</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> <p class="flex items-center font-semibold !mt-2 !mb-2 text-gray-800" data-svelte-h="svelte-lt6pb6">Parameters <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700 ml-3"></span></p> <ul class="px-2"><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.get_batch_logps.logits" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.get_batch_logps.logits"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>logits</strong> &#x2014; Logits of the model (unnormalized). Shape: (batch_size, sequence_length, vocab_size)<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.get_batch_logps.labels" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.get_batch_logps.labels"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>labels</strong> &#x2014; Labels for which to compute the log probabilities. Label tokens with a value of label_pad_token_id are ignored. Shape: (batch_size, sequence_length)<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.get_batch_logps.average_log_prob" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.get_batch_logps.average_log_prob"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>average_log_prob</strong> &#x2014; If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the log probabilities of the (non-masked) tokens.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.get_batch_logps.label_pad_token_id" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.get_batch_logps.label_pad_token_id"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>label_pad_token_id</strong> &#x2014; The label pad token id.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.get_batch_logps.is_encoder_decoder" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.get_batch_logps.is_encoder_decoder"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>is_encoder_decoder</strong> &#x2014; Whether the model is an encoder-decoder model.<!-- HTML_TAG_END --> </span></span> </li></ul> </div></div> <p data-svelte-h="svelte-4nmimv">Compute the log probabilities of the given labels under the given logits.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.get_batch_loss_metrics"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>get_batch_loss_metrics</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.get_batch_loss_metrics" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.get_batch_loss_metrics"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L810" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">model<span class="opacity-60"></span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">batch<span class="opacity-60">: dict</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">train_eval<span class="opacity-60">: typing.Literal['train', 'eval'] = 'train'</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-ndgk7s">Compute the ORPO loss and other metrics for the given batch of inputs for train or test.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.log"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>log</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.log" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.log"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L1000" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">logs<span class="opacity-60">: dict</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">start_time<span class="opacity-60">: typing.Optional[float] = None</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> <p class="flex items-center font-semibold !mt-2 !mb-2 text-gray-800" data-svelte-h="svelte-lt6pb6">Parameters <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700 ml-3"></span></p> <ul class="px-2"><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.log.logs" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.log.logs"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>logs</strong> (<code>dict[str, float]</code>) &#x2014; The values to log.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.log.start_time" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.log.start_time"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>start_time</strong> (<code>float</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014; Start time of the training.<!-- HTML_TAG_END --> </span></span> </li></ul> </div></div> <p data-svelte-h="svelte-nqksfz">Log <code>logs</code> on the various objects watching training, including stored metrics.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.odds_ratio_loss"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>odds_ratio_loss</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.odds_ratio_loss" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.odds_ratio_loss"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L660" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">policy_chosen_logps<span class="opacity-60">: FloatTensor</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">policy_rejected_logps<span class="opacity-60">: FloatTensor</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> <span class="font-bold" data-svelte-h="svelte-1j6k10o">→</span> <span class="rounded hover:bg-gray-400 cursor-pointer"><!-- HTML_TAG_START --><script context="module">export const metadata = 'undefined';</script><span>A tuple of three tensors</span><!-- HTML_TAG_END --></span></p> <div class="!mb-10 relative docstring-details "> <p class="flex items-center font-semibold !mt-2 !mb-2 text-gray-800" data-svelte-h="svelte-lt6pb6">Parameters <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700 ml-3"></span></p> <ul class="px-2"><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.odds_ratio_loss.policy_chosen_logps" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.odds_ratio_loss.policy_chosen_logps"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>policy_chosen_logps</strong> &#x2014; Log probabilities of the policy model for the chosen responses. Shape: (batch_size,)<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOTrainer.odds_ratio_loss.policy_rejected_logps" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOTrainer.odds_ratio_loss.policy_rejected_logps"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>policy_rejected_logps</strong> &#x2014; Log probabilities of the policy model for the rejected responses. Shape: (batch_size,)<!-- HTML_TAG_END --> </span></span> </li></ul> <div id="trl.ORPOTrainer.odds_ratio_loss.returns" class="flex items-center font-semibold space-x-3 text-base !mt-0 !mb-0 text-gray-800 rounded "><p class="text-base">Returns</p> <!-- HTML_TAG_START --><script context="module">export const metadata = 'undefined';</script> <p>A tuple of three tensors</p> <!-- HTML_TAG_END --> <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700"></span></div> <p class="text-base"><!-- HTML_TAG_START --><script context="module">export const metadata = 'undefined';</script> <p>(losses, chosen_rewards, rejected_rewards). The losses tensor contains the ORPO loss for each example in the batch. The chosen_rewards and rejected_rewards tensors contain the rewards for the chosen and rejected responses, respectively. The log odds ratio of the chosen responses over the rejected responses ratio for logging purposes. The <code>log(sigmoid(log_odds_chosen))</code> for logging purposes.</p> <!-- HTML_TAG_END --></p> </div></div> <p data-svelte-h="svelte-y2dr0o">Compute ORPO’s odds ratio (OR) loss for a batch of policy and reference model log probabilities.</p></div> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOTrainer.tokenize_row"><!-- HTML_TAG_START --><h4 class="!m-0"><span class="flex-1 rounded-xl py-0.5 break-all bg-gradient-to-r from-blue-50/60 to-white dark:from-gray-900 dark:to-gray-950 text-blue-700 dark:text-blue-300 font-medium px-2"><svg width="1em" height="1em" viewBox="0 0 32 33" class="mr-1 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg"><path d="M5.80566 18.3545C4.90766 17.4565 4.90766 16.0005 5.80566 15.1025L14.3768 6.53142C15.2748 5.63342 16.7307 5.63342 17.6287 6.53142L26.1999 15.1025C27.0979 16.0005 27.0979 17.4565 26.1999 18.3545L17.6287 26.9256C16.7307 27.8236 15.2748 27.8236 14.3768 26.9256L5.80566 18.3545Z" fill="currentColor" fill-opacity="0.25"/><path fill-rule="evenodd" clip-rule="evenodd" d="M16.4801 13.9619C16.4801 12.9761 16.7467 12.5436 16.9443 12.3296C17.1764 12.078 17.5731 11.8517 18.2275 11.707C18.8821 11.5623 19.638 11.5342 20.4038 11.5582C20.7804 11.57 21.1341 11.5932 21.4719 11.6156L21.5263 11.6193C21.8195 11.6389 22.1626 11.6618 22.4429 11.6618V7.40825C22.3209 7.40825 22.1219 7.39596 21.7544 7.37149C21.4202 7.34925 20.9976 7.32115 20.5371 7.30672C19.6286 7.27824 18.4672 7.29779 17.3093 7.55377C16.1512 7.8098 14.8404 8.33724 13.8181 9.4452C12.7612 10.5907 12.2266 12.1236 12.2266 13.9619V15.0127H10.6836V19.2662H12.2266V26.6332H16.4801V19.2662H20.3394V15.0127H16.4801V13.9619Z" fill="currentColor"/></svg>tokenize_row</span></h4><!-- HTML_TAG_END --> <a id="trl.ORPOTrainer.tokenize_row" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOTrainer.tokenize_row"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_trainer.py#L442" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">feature<span class="opacity-60"></span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">model<span class="opacity-60">: typing.Union[transformers.modeling_utils.PreTrainedModel, torch.nn.modules.module.Module, NoneType] = None</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> </div></div> <p data-svelte-h="svelte-xlsvih">Tokenize a single row from a ORPO specific dataset.</p> <p data-svelte-h="svelte-1mn1gnw">At this stage, we don’t convert to PyTorch tensors yet; we just handle the truncation in case the prompt + chosen or prompt + rejected responses is/are too long. First we truncate the prompt; if we’re still too long, we truncate the chosen/rejected.</p> <p data-svelte-h="svelte-1xvpyih">We also create the labels for the chosen/rejected responses, which are of length equal to the sum of the length of the prompt and the chosen/rejected response, with label_pad_token_id for the prompt tokens.</p></div></div> <h2 class="relative group"><a id="trl.ORPOConfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ORPOConfig</span></h2> <div class="docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"> <div><span class="group flex space-x-1.5 items-center text-gray-800 bg-gradient-to-r rounded-tr-lg -mt-4 -ml-4 pt-3 px-2.5" id="trl.ORPOConfig"><!-- HTML_TAG_START --><h3 class="!m-0"><span class="flex-1 break-all md:text-lg bg-gradient-to-r px-2.5 py-1.5 rounded-xl from-indigo-50/70 to-white dark:from-gray-900 dark:to-gray-950 dark:text-indigo-300 text-indigo-700"><svg class="mr-1.5 text-indigo-500 inline-block -mt-0.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width=".8em" height=".8em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg><span class="font-light">class</span> <span class="font-medium">trl.</span><span class="font-semibold">ORPOConfig</span></span></h3><!-- HTML_TAG_END --> <a id="trl.ORPOConfig" class="header-link invisible with-hover:group-hover:visible pr-2" href="#trl.ORPOConfig"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></a> <a class="!ml-auto !text-gray-400 !no-underline text-sm flex items-center" href="https://github.com/huggingface/trl/blob/v0.15.2/trl/trainer/orpo_config.py#L21" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span class="hidden md:block mx-0.5 hover:!underline" data-svelte-h="svelte-122apf4">source</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span></a></span> <p class="font-mono text-xs md:text-sm !leading-relaxed !my-6"><span data-svelte-h="svelte-8mvn6a">(</span> <span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">output_dir<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">overwrite_output_dir<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">do_train<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">do_eval<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">do_predict<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">eval_strategy<span class="opacity-60">: typing.Union[transformers.trainer_utils.IntervalStrategy, str] = 'no'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">prediction_loss_only<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">per_device_train_batch_size<span class="opacity-60">: int = 8</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">per_device_eval_batch_size<span class="opacity-60">: int = 8</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">per_gpu_train_batch_size<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">per_gpu_eval_batch_size<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">gradient_accumulation_steps<span class="opacity-60">: int = 1</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">eval_accumulation_steps<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">eval_delay<span class="opacity-60">: typing.Optional[float] = 0</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">torch_empty_cache_steps<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">learning_rate<span class="opacity-60">: float = 1e-06</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">weight_decay<span class="opacity-60">: float = 0.0</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">adam_beta1<span class="opacity-60">: float = 0.9</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">adam_beta2<span class="opacity-60">: float = 0.999</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">adam_epsilon<span class="opacity-60">: float = 1e-08</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">max_grad_norm<span class="opacity-60">: float = 1.0</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">num_train_epochs<span class="opacity-60">: float = 3.0</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">max_steps<span class="opacity-60">: int = -1</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">lr_scheduler_type<span class="opacity-60">: typing.Union[transformers.trainer_utils.SchedulerType, str] = 'linear'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">lr_scheduler_kwargs<span class="opacity-60">: typing.Union[dict, str, NoneType] = &lt;factory></span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">warmup_ratio<span class="opacity-60">: float = 0.0</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">warmup_steps<span class="opacity-60">: int = 0</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">log_level<span class="opacity-60">: typing.Optional[str] = 'passive'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">log_level_replica<span class="opacity-60">: typing.Optional[str] = 'warning'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">log_on_each_node<span class="opacity-60">: bool = True</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">logging_dir<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">logging_strategy<span class="opacity-60">: typing.Union[transformers.trainer_utils.IntervalStrategy, str] = 'steps'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">logging_first_step<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">logging_steps<span class="opacity-60">: float = 500</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">logging_nan_inf_filter<span class="opacity-60">: bool = True</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">save_strategy<span class="opacity-60">: typing.Union[transformers.trainer_utils.SaveStrategy, str] = 'steps'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">save_steps<span class="opacity-60">: float = 500</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">save_total_limit<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">save_safetensors<span class="opacity-60">: typing.Optional[bool] = True</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">save_on_each_node<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">save_only_model<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">restore_callback_states_from_checkpoint<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">no_cuda<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">use_cpu<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">use_mps_device<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">seed<span class="opacity-60">: int = 42</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">data_seed<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">jit_mode_eval<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">use_ipex<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">bf16<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">fp16<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">fp16_opt_level<span class="opacity-60">: str = 'O1'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">half_precision_backend<span class="opacity-60">: str = 'auto'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">bf16_full_eval<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">fp16_full_eval<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">tf32<span class="opacity-60">: typing.Optional[bool] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">local_rank<span class="opacity-60">: int = -1</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">ddp_backend<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">tpu_num_cores<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">tpu_metrics_debug<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">debug<span class="opacity-60">: typing.Union[str, typing.List[transformers.debug_utils.DebugOption]] = ''</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">dataloader_drop_last<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">eval_steps<span class="opacity-60">: typing.Optional[float] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">dataloader_num_workers<span class="opacity-60">: int = 0</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">dataloader_prefetch_factor<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">past_index<span class="opacity-60">: int = -1</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">run_name<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">disable_tqdm<span class="opacity-60">: typing.Optional[bool] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">remove_unused_columns<span class="opacity-60">: typing.Optional[bool] = True</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">label_names<span class="opacity-60">: typing.Optional[typing.List[str]] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">load_best_model_at_end<span class="opacity-60">: typing.Optional[bool] = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">metric_for_best_model<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">greater_is_better<span class="opacity-60">: typing.Optional[bool] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">ignore_data_skip<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">fsdp<span class="opacity-60">: typing.Union[typing.List[transformers.trainer_utils.FSDPOption], str, NoneType] = ''</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">fsdp_min_num_params<span class="opacity-60">: int = 0</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">fsdp_config<span class="opacity-60">: typing.Union[dict, str, NoneType] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">tp_size<span class="opacity-60">: typing.Optional[int] = 0</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">fsdp_transformer_layer_cls_to_wrap<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">accelerator_config<span class="opacity-60">: typing.Union[dict, str, NoneType] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">deepspeed<span class="opacity-60">: typing.Union[dict, str, NoneType] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">label_smoothing_factor<span class="opacity-60">: float = 0.0</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">optim<span class="opacity-60">: typing.Union[transformers.training_args.OptimizerNames, str] = 'adamw_torch'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">optim_args<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">adafactor<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">group_by_length<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">length_column_name<span class="opacity-60">: typing.Optional[str] = 'length'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">report_to<span class="opacity-60">: typing.Union[NoneType, str, typing.List[str]] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">ddp_find_unused_parameters<span class="opacity-60">: typing.Optional[bool] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">ddp_bucket_cap_mb<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">ddp_broadcast_buffers<span class="opacity-60">: typing.Optional[bool] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">dataloader_pin_memory<span class="opacity-60">: bool = True</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">dataloader_persistent_workers<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">skip_memory_metrics<span class="opacity-60">: bool = True</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">use_legacy_prediction_loop<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">push_to_hub<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">resume_from_checkpoint<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">hub_model_id<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">hub_strategy<span class="opacity-60">: typing.Union[transformers.trainer_utils.HubStrategy, str] = 'every_save'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">hub_token<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">hub_private_repo<span class="opacity-60">: typing.Optional[bool] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">hub_always_push<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">gradient_checkpointing<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">gradient_checkpointing_kwargs<span class="opacity-60">: typing.Union[dict, str, NoneType] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">include_inputs_for_metrics<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">include_for_metrics<span class="opacity-60">: typing.List[str] = &lt;factory></span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">eval_do_concat_batches<span class="opacity-60">: bool = True</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">fp16_backend<span class="opacity-60">: str = 'auto'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">evaluation_strategy<span class="opacity-60">: typing.Union[transformers.trainer_utils.IntervalStrategy, str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">push_to_hub_model_id<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">push_to_hub_organization<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">push_to_hub_token<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">mp_parameters<span class="opacity-60">: str = ''</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">auto_find_batch_size<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">full_determinism<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">torchdynamo<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">ray_scope<span class="opacity-60">: typing.Optional[str] = 'last'</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">ddp_timeout<span class="opacity-60">: typing.Optional[int] = 1800</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">torch_compile<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">torch_compile_backend<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">torch_compile_mode<span class="opacity-60">: typing.Optional[str] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">dispatch_batches<span class="opacity-60">: typing.Optional[bool] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">split_batches<span class="opacity-60">: typing.Optional[bool] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">include_tokens_per_second<span class="opacity-60">: typing.Optional[bool] = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">include_num_input_tokens_seen<span class="opacity-60">: typing.Optional[bool] = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">neftune_noise_alpha<span class="opacity-60">: typing.Optional[float] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">optim_target_modules<span class="opacity-60">: typing.Union[NoneType, str, typing.List[str]] = None</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">batch_eval_metrics<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">eval_on_start<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">use_liger_kernel<span class="opacity-60">: typing.Optional[bool] = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">eval_use_gather_object<span class="opacity-60">: typing.Optional[bool] = False</span></span> </span><span class="comma cursor-default"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">average_tokens_across_devices<span class="opacity-60">: typing.Optional[bool] = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">max_length<span class="opacity-60">: typing.Optional[int] = 1024</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">max_prompt_length<span class="opacity-60">: typing.Optional[int] = 512</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">max_completion_length<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">beta<span class="opacity-60">: float = 0.1</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">disable_dropout<span class="opacity-60">: bool = True</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">label_pad_token_id<span class="opacity-60">: int = -100</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">padding_value<span class="opacity-60">: typing.Optional[int] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">truncation_mode<span class="opacity-60">: str = 'keep_end'</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">generate_during_eval<span class="opacity-60">: bool = False</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">is_encoder_decoder<span class="opacity-60">: typing.Optional[bool] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">model_init_kwargs<span class="opacity-60">: typing.Optional[dict[str, typing.Any]] = None</span></span> </span><span class="comma cursor-pointer"><span class="rounded hover:bg-black hover:text-white dark:hover:bg-white dark:hover:text-black">dataset_num_proc<span class="opacity-60">: typing.Optional[int] = None</span></span> </span> <span data-svelte-h="svelte-1jq0pl7">)</span> </p> <div class="!mb-10 relative docstring-details "> <p class="flex items-center font-semibold !mt-2 !mb-2 text-gray-800" data-svelte-h="svelte-lt6pb6">Parameters <span class="flex-auto border-t-2 border-gray-100 dark:border-gray-700 ml-3"></span></p> <ul class="px-2"><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.learning_rate" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.learning_rate"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>learning_rate</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1e-6</code>) &#x2014; Initial learning rate for <code>AdamW</code> optimizer. The default value replaces that of <a href="https://huggingface.co/docs/transformers/v4.49.0/en/main_classes/trainer#transformers.TrainingArguments" rel="nofollow">TrainingArguments</a>.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.max_length" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.max_length"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>max_length</strong> (<code>int</code> or <code>None</code>, <em>optional</em>, defaults to <code>1024</code>) &#x2014; Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want to use the default data collator.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.max_prompt_length" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.max_prompt_length"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>max_prompt_length</strong> (<code>int</code> or <code>None</code>, <em>optional</em>, defaults to <code>512</code>) &#x2014; Maximum length of the prompt. This argument is required if you want to use the default data collator.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.max_completion_length" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.max_completion_length"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>max_completion_length</strong> (<code>int</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014; Maximum length of the completion. This argument is required if you want to use the default data collator and your model is an encoder-decoder.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.beta" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.beta"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>beta</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.1</code>) &#x2014; Parameter controlling the relative ratio loss weight in the ORPO loss. In the <a href="https://huggingface.co/papers/2403.07691" rel="nofollow">paper</a>, it is denoted by &#x3BB;. In the <a href="https://github.com/xfactlab/orpo" rel="nofollow">code</a>, it is denoted by <code>alpha</code>.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.disable_dropout" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.disable_dropout"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>disable_dropout</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014; Whether to disable dropout in the model.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.label_pad_token_id" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.label_pad_token_id"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>label_pad_token_id</strong> (<code>int</code>, <em>optional</em>, defaults to <code>-100</code>) &#x2014; Label pad token id. This argument is required if you want to use the default data collator.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.padding_value" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.padding_value"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>padding_value</strong> (<code>int</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014; Padding value to use. If <code>None</code>, the padding value of the tokenizer is used.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.truncation_mode" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.truncation_mode"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>truncation_mode</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;keep_end&quot;</code>) &#x2014; Truncation mode to use when the prompt is too long. Possible values are <code>&quot;keep_end&quot;</code> or <code>&quot;keep_start&quot;</code>. This argument is required if you want to use the default data collator.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.generate_during_eval" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.generate_during_eval"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>generate_during_eval</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014; If <code>True</code>, generates and logs completions from the model to W&amp;B or Comet during evaluation.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.is_encoder_decoder" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.is_encoder_decoder"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>is_encoder_decoder</strong> (<code>bool</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014; When using the <code>model_init</code> argument (callable) to instantiate the model instead of the <code>model</code> argument, you need to specify if the model returned by the callable is an encoder-decoder model.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.model_init_kwargs" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.model_init_kwargs"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>model_init_kwargs</strong> (<code>dict[str, Any]</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014; Keyword arguments to pass to <code>AutoModelForCausalLM.from_pretrained</code> when instantiating the model from a string.<!-- HTML_TAG_END --> </span></span> </li><li class="text-base !pl-4 my-3 rounded "><span class="group flex space-x-1.5 items-start"><a id="trl.ORPOConfig.dataset_num_proc" class="header-link block pr-0.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl.ORPOConfig.dataset_num_proc"><span><svg class="text-smd" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span><!-- HTML_TAG_START --><strong>dataset_num_proc</strong> (<code>int</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014; Number of processes to use for processing the dataset.<!-- HTML_TAG_END --> </span></span> </li></ul> </div></div> <p data-svelte-h="svelte-1pb1aa1">Configuration class for the <a href="/docs/trl/v0.15.2/en/orpo_trainer#trl.ORPOTrainer">ORPOTrainer</a>.</p> <p data-svelte-h="svelte-2adg1n">Using <a href="https://huggingface.co/docs/transformers/v4.49.0/en/internal/trainer_utils#transformers.HfArgumentParser" rel="nofollow">HfArgumentParser</a> we can turn this class into <a href="https://docs.python.org/3/library/argparse#module-argparse" rel="nofollow">argparse</a> arguments that can be specified on the command line.</p></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/trl/blob/main/docs/source/orpo_trainer.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> <script> { __sveltekit_wrknss = { assets: "/docs/trl/v0.15.2/en", base: "/docs/trl/v0.15.2/en", env: {} }; const element = document.currentScript.parentElement; const data = [null,null]; Promise.all([ import("/docs/trl/v0.15.2/en/_app/immutable/entry/start.a67350a3.js"), import("/docs/trl/v0.15.2/en/_app/immutable/entry/app.0e07cdbf.js") ]).then(([kit, app]) => { kit.start(app, element, { node_ids: [0, 32], data, form: null, error: null }); }); } </script> <!-- HTML_TAG_END --></div> <div class="SVELTE_HYDRATER contents" data-target="DocFooterNav" data-props="{&quot;classNames&quot;:&quot;mx-auto mt-16 flex max-w-4xl items-center pb-8 font-sans font-medium leading-6 xl:mt-32&quot;,&quot;chapterPrev&quot;:{&quot;title&quot;:&quot;Nash-MD&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;nash_md_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/nash_md_trainer&quot;},&quot;chapterNext&quot;:{&quot;title&quot;:&quot;PPO&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;ppo_trainer&quot;,&quot;url&quot;:&quot;/docs/trl/ppo_trainer&quot;},&quot;isCourse&quot;:false,&quot;isLoggedIn&quot;:false}"><div class="mx-auto mt-16 flex max-w-4xl items-center pb-8 font-sans font-medium leading-6 xl:mt-32"><a href="/docs/trl/nash_md_trainer" class="mr-8 flex transform items-center text-gray-600 transition-all hover:-translate-x-px hover:text-gray-900 dark:hover:text-gray-300"><span class="mr-2 translate-y-px">←</span>Nash-MD</a> <a href="/docs/trl/ppo_trainer" class="ml-auto flex transform items-center text-right text-gray-600 transition-all hover:translate-x-px hover:text-gray-900 dark:hover:text-gray-300">PPO<span class="ml-2 translate-y-px">→</span></a></div></div></div></div> <div class="sticky top-0 self-start"><div class="SVELTE_HYDRATER contents" data-target="SubSideMenu" data-props="{&quot;chapter&quot;:{&quot;title&quot;:&quot;ORPO Trainer&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;orpo-trainer&quot;,&quot;url&quot;:&quot;#orpo-trainer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Overview&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;overview&quot;,&quot;url&quot;:&quot;#overview&quot;,&quot;sections&quot;:[]},{&quot;title&quot;:&quot;Quick start&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;quick-start&quot;,&quot;url&quot;:&quot;#quick-start&quot;,&quot;sections&quot;:[]},{&quot;title&quot;:&quot;Expected dataset type&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;expected-dataset-type&quot;,&quot;url&quot;:&quot;#expected-dataset-type&quot;,&quot;sections&quot;:[]},{&quot;title&quot;:&quot;Example script&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;example-script&quot;,&quot;url&quot;:&quot;#example-script&quot;,&quot;sections&quot;:[]},{&quot;title&quot;:&quot;Usage tips&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;usage-tips&quot;,&quot;url&quot;:&quot;#usage-tips&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;For Mixture of Experts Models: Enabling the auxiliary loss&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;for-mixture-of-experts-models-enabling-the-auxiliary-loss&quot;,&quot;url&quot;:&quot;#for-mixture-of-experts-models-enabling-the-auxiliary-loss&quot;,&quot;sections&quot;:[]}]},{&quot;title&quot;:&quot;Logged metrics&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;logged-metrics&quot;,&quot;url&quot;:&quot;#logged-metrics&quot;,&quot;sections&quot;:[]},{&quot;title&quot;:&quot;ORPOTrainer&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;trl.ORPOTrainer&quot;,&quot;url&quot;:&quot;#trl.ORPOTrainer&quot;,&quot;sections&quot;:[]},{&quot;title&quot;:&quot;ORPOConfig&quot;,&quot;isExpanded&quot;:true,&quot;id&quot;:&quot;trl.ORPOConfig&quot;,&quot;url&quot;:&quot;#trl.ORPOConfig&quot;,&quot;sections&quot;:[]}]}}"> <nav class="hidden h-dvh w-[270px] flex-none flex-col space-y-3 overflow-y-auto break-words border-l pb-16 pl-6 pr-10 pt-24 text-sm lg:flex 2xl:w-[305px]"> <a href="#orpo-trainer" class=" text-gray-400 transform hover:translate-x-px hover:text-gray-700 dark:hover:text-gray-300" id="nav-orpo-trainer"><!-- HTML_TAG_START -->ORP<wbr>O <wbr>Trainer<!-- HTML_TAG_END --></a> <a href="#overview" class="pl-4 text-gray-400 transform hover:translate-x-px hover:text-gray-700 dark:hover:text-gray-300" id="nav-overview"><!-- HTML_TAG_START --><wbr>Overview<!-- HTML_TAG_END --></a> <a href="#quick-start" class="pl-4 text-gray-400 transform hover:translate-x-px hover:text-gray-700 dark:hover:text-gray-300" id="nav-quick-start"><!-- HTML_TAG_START --><wbr>Quick start<!-- HTML_TAG_END --></a> <a href="#expected-dataset-type" class="pl-4 text-gray-400 transform hover:translate-x-px hover:text-gray-700 dark:hover:text-gray-300" id="nav-expected-dataset-type"><!-- HTML_TAG_START --><wbr>Expected dataset type<!-- HTML_TAG_END --></a> <a href="#example-script" class="pl-4 text-gray-400 transform hover:translate-x-px hover:text-gray-700 dark:hover:text-gray-300" id="nav-example-script"><!-- HTML_TAG_START --><wbr>Example script<!-- HTML_TAG_END --></a> <a href="#usage-tips" class="pl-4 text-gray-400 transform hover:translate-x-px hover:text-gray-700 dark:hover:text-gray-300" id="nav-usage-tips"><!-- HTML_TAG_START --><wbr>Usage tips<!-- HTML_TAG_END --></a> <a href="#for-mixture-of-experts-models-enabling-the-auxiliary-loss" class="pl-8 text-gray-400 transform hover:translate-x-px hover:text-gray-700 dark:hover:text-gray-300" id="nav-for-mixture-of-experts-models-enabling-the-auxiliary-loss"><!-- HTML_TAG_START --><wbr>For <wbr>Mixture of <wbr>Experts <wbr>Models: <wbr>Enabling the auxiliary loss<!-- HTML_TAG_END --></a> <a href="#logged-metrics" class="pl-4 text-gray-400 transform hover:translate-x-px hover:text-gray-700 dark:hover:text-gray-300" id="nav-logged-metrics"><!-- HTML_TAG_START --><wbr>Logged metrics<!-- HTML_TAG_END --></a> <a href="#trl.ORPOTrainer" class="pl-4 text-gray-400 transform hover:translate-x-px hover:text-gray-700 dark:hover:text-gray-300" id="nav-trl.ORPOTrainer"><!-- HTML_TAG_START -->ORPO<wbr>Trainer<!-- HTML_TAG_END --></a> <a href="#trl.ORPOConfig" class="pl-4 text-gray-400 transform hover:translate-x-px hover:text-gray-700 dark:hover:text-gray-300" id="nav-trl.ORPOConfig"><!-- HTML_TAG_START -->ORPO<wbr>Config<!-- HTML_TAG_END --></a> </nav></div></div></div> <div id="doc-footer"></div></main> </div> <script> import("\/front\/build\/kube-9063520\/index.js"); window.moonSha = "kube-9063520\/"; window.__hf_deferred = {}; </script> <!-- Stripe --> <script> if (["hf.co", "huggingface.co"].includes(window.location.hostname)) { const script = document.createElement("script"); script.src = "https://js.stripe.com/v3/"; script.async = true; document.head.appendChild(script); } </script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10