CINXE.COM
Yizeng Han | Papers With Code
<!doctype html> <html lang="en"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <script> const GTAG_ENABLED = true ; const GTAG_TRACKING_ID = "UA-121182717-1"; const SENTRY_DSN_FRONTEND = "".trim(); const GLOBAL_CSRF_TOKEN = 'EasKwbFlIOI6byL50mbN2tH22oaHIdeUINlMRHojs4ZT4CKoD6v5BkM0XEqTJIw5'; const MEDIA_URL = "https://production-media.paperswithcode.com/"; const ASSETS_URL = "https://production-assets.paperswithcode.com"; run_after_frontend_loaded = window.run_after_frontend_loaded || []; </script> <link rel="preconnect" href="https://production-assets.paperswithcode.com"><link rel="dns-prefetch" href="https://production-assets.paperswithcode.com"><link rel="preload" as="font" type="font/woff2" href="https://production-assets.paperswithcode.com/perf/fonts/65e877e527022735c1a1.woff2" crossorigin><link rel="preload" as="font" type="font/woff2" href="https://production-assets.paperswithcode.com/perf/fonts/917632e36982ca7933c8.woff2" crossorigin><link rel="preload" as="font" type="font/woff2" href="https://production-assets.paperswithcode.com/perf/fonts/f1405bd8a987c2ea8a67.woff2" crossorigin><script>(()=>{if(GTAG_ENABLED){const t=document.createElement("script");function n(){window.dataLayer.push(arguments)}t.src=`https://www.googletagmanager.com/gtag/js?id=${GTAG_TRACKING_ID}`,document.head.appendChild(t),window.dataLayer=window.dataLayer||[],window.gtag=n,n("js",new Date),n("config",GTAG_TRACKING_ID),window.captureOutboundLink=function(t){n("event","click",{event_category:"outbound",event_label:t})}}else window.captureOutboundLink=function(n){document.location=n}})();</script><style>:root{--bs-blue: #0d6efd;--bs-indigo: #6610f2;--bs-purple: #6f42c1;--bs-pink: #d63384;--bs-red: #dc3545;--bs-orange: #fd7e14;--bs-yellow: #ffc107;--bs-green: #198754;--bs-teal: #20c997;--bs-cyan: #21cbce;--bs-white: #fff;--bs-gray: #6c757d;--bs-gray-dark: #343a40;--bs-primary: #0d6efd;--bs-secondary: #6c757d;--bs-success: #198754;--bs-info: #21cbce;--bs-warning: #ffc107;--bs-danger: #dc3545;--bs-light: #f8f9fa;--bs-dark: #212529;--bs-font-sans-serif: system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";--bs-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;--bs-gradient: linear-gradient(180deg, rgba(255, 255, 255, 0.15), rgba(255, 255, 255, 0))}@font-face{font-family:"Lato";font-style:normal;font-weight:300;font-display:swap;src:local(""),url(https://production-assets.paperswithcode.com/perf/fonts/917632e36982ca7933c8.woff2) format("woff2")}@font-face{font-family:"Lato";font-style:normal;font-weight:400;font-display:swap;src:local(""),url(https://production-assets.paperswithcode.com/perf/fonts/65e877e527022735c1a1.woff2) format("woff2")}@font-face{font-family:"Lato";font-style:normal;font-weight:700;font-display:swap;src:local(""),url(https://production-assets.paperswithcode.com/perf/fonts/f1405bd8a987c2ea8a67.woff2) format("woff2")}@font-face{font-family:"Computer Modern Serif";src:local(""),url(https://production-assets.paperswithcode.com/perf/fonts/b63de31899ec71cfb870.woff) format("woff");font-display:swap;font-weight:normal;font-style:normal}@font-face{font-family:"Computer Modern Serif";src:local(""),url(https://production-assets.paperswithcode.com/perf/fonts/5d5c7512cb539fb279b2.woff) format("woff");font-display:swap;font-weight:bold;font-style:normal}@font-face{font-family:"Computer Modern Serif";src:local(""),url(https://production-assets.paperswithcode.com/perf/fonts/357ce3503c6299bc1b58.woff) format("woff");font-display:swap;font-weight:normal;font-style:italic}@font-face{font-family:"Computer Modern Serif";src:local(""),url(https://production-assets.paperswithcode.com/perf/fonts/c83e6f15d4c7568ee872.woff) format("woff");font-display:swap;font-weight:bold;font-style:italic}@font-face{font-family:"Exo";font-style:normal;font-weight:100;src:local(""),url(https://production-assets.paperswithcode.com/perf/fonts/729c812ee9989426abb1.woff2) format("woff2");font-display:swap}@font-face{font-family:"Nunito";font-style:normal;font-weight:400;src:local(""),url(https://production-assets.paperswithcode.com/perf/fonts/39a18f443d434999b89b.woff2) format("woff2");font-display:swap}@font-face{font-family:"Nunito";font-style:normal;font-weight:700;src:local(""),url(https://production-assets.paperswithcode.com/perf/fonts/4ad349571e28bb59c5a5.woff2) format("woff2");font-display:swap}*,*::before,*::after{box-sizing:border-box}@media(prefers-reduced-motion: no-preference){:root{scroll-behavior:smooth}}body{margin:0;font-family:system-ui,-apple-system,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans","Liberation Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";font-size:1rem;font-weight:400;line-height:1.5;color:#212529;background-color:#fff;-webkit-text-size-adjust:100%;-webkit-tap-highlight-color:rgba(0,0,0,0)}[tabindex="-1"]:focus:not(:focus-visible){outline:0 !important}hr{margin:1rem 0;color:#000;background-color:currentColor;border:0;opacity:.1}hr:not([size]){height:1px}h6,h5,h4,h3,h2,h1{margin-top:0;margin-bottom:.5rem;font-weight:500;line-height:1.2}h1{font-size:calc(1.375rem + 1.5vw)}@media(min-width: 1200px){h1{font-size:2.5rem}}h2{font-size:calc(1.325rem + 0.9vw)}@media(min-width: 1200px){h2{font-size:2rem}}h3{font-size:calc(1.3rem + 0.6vw)}@media(min-width: 1200px){h3{font-size:1.75rem}}h4{font-size:calc(1.275rem + 0.3vw)}@media(min-width: 1200px){h4{font-size:1.5rem}}h5{font-size:1.25rem}h6{font-size:1rem}p{margin-top:0;margin-bottom:1rem}ol,ul{padding-left:2rem}ol,ul{margin-top:0;margin-bottom:1rem}ul ul{margin-bottom:0}b,strong{font-weight:bolder}small{font-size:0.875em}a{color:#0d6efd;text-decoration:none}a:hover{color:#0a58ca;text-decoration:none}a:not([href]):not([class]),a:not([href]):not([class]):hover{color:inherit;text-decoration:none}pre,code{font-family:var(--bs-font-monospace);font-size:1em;direction:ltr /* rtl:ignore */;unicode-bidi:bidi-override}pre{display:block;margin-top:0;margin-bottom:1rem;overflow:auto;font-size:0.875em}code{font-size:0.875em;color:#d63384;word-wrap:break-word}a>code{color:inherit}figure{margin:0 0 1rem}img,svg{vertical-align:middle}table{caption-side:bottom;border-collapse:collapse}th{text-align:inherit;text-align:-webkit-match-parent}thead,tbody,tfoot,tr,td,th{border-color:inherit;border-style:solid;border-width:0}label{display:inline-block}button{border-radius:0}button:focus:not(:focus-visible){outline:0}input,button,select,optgroup,textarea{margin:0;font-family:inherit;font-size:inherit;line-height:inherit}button,select{text-transform:none}[role=button]{cursor:pointer}select{word-wrap:normal}[list]::-webkit-calendar-picker-indicator{display:none}button,[type=button],[type=reset],[type=submit]{-webkit-appearance:button}button:not(:disabled),[type=button]:not(:disabled),[type=reset]:not(:disabled),[type=submit]:not(:disabled){cursor:pointer}::-moz-focus-inner{padding:0;border-style:none}textarea{resize:vertical}::-webkit-datetime-edit-fields-wrapper,::-webkit-datetime-edit-text,::-webkit-datetime-edit-minute,::-webkit-datetime-edit-hour-field,::-webkit-datetime-edit-day-field,::-webkit-datetime-edit-month-field,::-webkit-datetime-edit-year-field{padding:0}::-webkit-inner-spin-button{height:auto}[type=search]{outline-offset:-2px;-webkit-appearance:textfield}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-color-swatch-wrapper{padding:0}::file-selector-button{font:inherit}::-webkit-file-upload-button{font:inherit;-webkit-appearance:button}iframe{border:0}[hidden]{display:none !important}.list-unstyled{padding-left:0;list-style:none}small,.small{font-size:0.875em;font-weight:400}.footer{display:block;margin-top:30px;padding:15px;border-top:1px solid #e0e0e0;font-size:13px;color:#aaa;text-align:center}.footer a{color:#999}.footer-contact{margin-bottom:5px}.footer-contact-item{display:inline-block}.footer-links>*:not(:last-child){margin-right:1rem}.icon-wrapper{display:inline-block;width:1em;height:1em;contain:strict;fill:currentcolor;box-sizing:content-box !important}.icon-wrapper.icon-fa{position:relative;top:2px}.icon-wrapper svg{display:block;height:100%;width:100%}.icon-wrapper[data-name=slack] svg{width:200%;height:200%;transform:translate(-25%, -25%)}.icon-wrapper:not(.icon-color)>svg>*{stroke:currentColor}.navbar-brand .icon-wrapper{color:#21cbce;width:30px;height:30px;vertical-align:middle}.navbar-mobile-twitter{margin-right:18px !important;padding-top:1px}.navbar-mobile-twitter a{color:#1d9bf0}.navbar-mobile-twitter .icon-wrapper{width:23px;height:23px}.header-search{margin-bottom:26px}.header-search form{position:relative}.header-search .icon{color:gray;position:absolute !important;top:50% !important;left:initial !important;padding-right:0 !important;transform:translateY(-50%);right:22px;padding:0;height:20px;width:20px}.header-search .icon .icon-wrapper{width:100%;height:100%;top:0}.nav-link-social-icon{color:#1d9bf0;width:25px;height:25px}.nav-link-social-icon-slack{vertical-align:middle}@media(min-width: 992px){.header-search{margin:0}.header-search .icon{right:10px}.nav-link-social-icon{width:20px;height:20px}.nav-link-social-icon-slack{width:22px;height:22px}} </style><link href="https://production-assets.paperswithcode.com/static/css/13.a0e289cc.chunk.css" rel="stylesheet"><link href="https://production-assets.paperswithcode.com/static/css/main.cd7ec85b.chunk.css" rel="stylesheet"> <!-- Metadata --> <title>Yizeng Han | Papers With Code</title> <meta name="description" content="Papers by Yizeng Han with links to code and results." /> <!-- Open Graph protocol metadata --> <meta property="og:title" content="Papers with Code - Yizeng Han"> <meta property="og:description" content="Papers by Yizeng Han with links to code and results."> <meta property="og:image" content="https://paperswithcode.com/static/index.jpeg"> <meta property="og:url" content="https://paperswithcode.com/search?q=author%3AYizeng+Han"> <!-- Twitter metadata --> <meta name="twitter:card" content="summary_large_image"> <meta name="twitter:site" content="@paperswithcode"> <meta name="twitter:title" content="Papers with Code - Yizeng Han"> <meta name="twitter:description" content="Papers by Yizeng Han with links to code and results."> <meta name="twitter:creator" content="@paperswithcode"> <meta name="twitter:url" content="https://paperswithcode.com/search?q=author%3AYizeng+Han"> <meta name="twitter:domain" content="paperswithcode.com"> <!-- JSON LD --> <script type="application/ld+json">{ "@context": "http://schema.org", "@graph": { "@type": "WebPage", "name": "Yizeng Han", "description": "Papers by Yizeng Han with links to code and results.", "url": "https://paperswithcode.com/search?q=author%3AYizeng+Han", "image": "https://paperswithcode.com/static/index.jpeg", "headline": "Yizeng Han" } }</script> <meta name="theme-color" content="#fff"/> <link rel="manifest" href="https://production-assets.paperswithcode.com/static/manifest.web.json"> </head> <body> <nav class="navbar navbar-expand-lg navbar-light header"> <a class="navbar-brand" href="/"> <span class=" icon-wrapper" data-name="pwc"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path d="M88 128h48v256H88zm144 0h48v256h-48zm-72 16h48v224h-48zm144 0h48v224h-48zm72-16h48v256h-48z"/><path d="M104 104V56H16v400h88v-48H64V104zm304-48v48h40v304h-40v48h88V56z"/></svg></span> </a> <div class="navbar-mobile-twitter d-lg-none"> <a rel="noreferrer" href="https://twitter.com/paperswithcode"> <span class=" icon-wrapper icon-fa icon-fa-brands" data-name="twitter"><svg viewBox="0 0 512.001 515.25" xmlns="http://www.w3.org/2000/svg"><path d="M459.37 152.016c.326 4.548.326 9.097.326 13.645 0 138.72-105.583 298.558-298.559 298.558C101.685 464.22 46.457 447 0 417.114c8.447.973 16.568 1.298 25.34 1.298 49.054 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.113-72.772 6.499.975 12.996 1.624 19.819 1.624 9.42 0 18.843-1.3 27.613-3.573-48.08-9.747-84.142-51.98-84.142-102.984v-1.3c13.968 7.798 30.213 12.67 47.43 13.32-28.263-18.843-46.78-51.006-46.78-87.391 0-19.492 5.196-37.36 14.294-52.954 51.654 63.674 129.3 105.258 216.364 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.827 46.782-104.934 104.934-104.934 30.214 0 57.502 12.67 76.671 33.136 23.715-4.548 46.455-13.319 66.599-25.34-7.798 24.367-24.366 44.834-46.132 57.828 21.117-2.274 41.584-8.122 60.426-16.244-14.292 20.791-32.161 39.309-52.628 54.253z"/></svg></span> </a> </div> <button class="navbar-toggler" type="button" data-toggle="collapse" data-bs-toggle="collapse" data-target="#top-menu" data-bs-target="#top-menu" aria-controls="top-menu" aria-expanded="false" aria-label="Toggle navigation" > <span class="navbar-toggler-icon"></span> </button> <div class="collapse navbar-collapse" id="top-menu"> <ul class="navbar-nav mr-auto navbar-nav__left light-header"> <li class="nav-item header-search"> <form action="/search" method="get" id="id_global_search_form" autocomplete="off"> <input type="text" name="q_meta" style="display:none" id="q_meta" /> <input type="hidden" name="q_type" id="q_type" /> <input id="id_global_search_input" autocomplete="off" value="" name='q' class="global-search" type="search" placeholder='Search'/> <button type="submit" class="icon"><span class=" icon-wrapper icon-fa icon-fa-light" data-name="search"><svg viewBox="0 0 512.025 520.146" xmlns="http://www.w3.org/2000/svg"><path d="M508.5 482.6c4.7 4.7 4.7 12.3 0 17l-9.9 9.9c-4.7 4.7-12.3 4.7-17 0l-129-129c-2.2-2.3-3.5-5.3-3.5-8.5v-10.2C312 396 262.5 417 208 417 93.1 417 0 323.9 0 209S93.1 1 208 1s208 93.1 208 208c0 54.5-21 104-55.3 141.1H371c3.2 0 6.2 1.2 8.5 3.5zM208 385c97.3 0 176-78.7 176-176S305.3 33 208 33 32 111.7 32 209s78.7 176 176 176z"/></svg></span></button> </form> </li> <li class="nav-item"> <a class="nav-link" href="/sota"> Browse State-of-the-Art </a> </li> <li class="nav-item"> <a class="nav-link" href="/datasets"> Datasets </a> </li> <li class="nav-item"> <a class="nav-link" href="/methods">Methods</a> </li> <li class="nav-item dropdown"> <a class="nav-link dropdown-toggle" role="button" id="navbarDropdownRepro" data-toggle="dropdown" data-bs-toggle="dropdown" aria-haspopup="true" aria-expanded="false" > More </a> <div class="dropdown-menu" aria-labelledby="navbarDropdownRepro"> <a class="dropdown-item" href="/newsletter">Newsletter</a> <a class="dropdown-item" href="/rc2022">RC2022</a> <div class="dropdown-divider"></div> <a class="dropdown-item" href="/about">About</a> <a class="dropdown-item" href="/trends">Trends</a> <a class="dropdown-item" href="https://portal.paperswithcode.com/"> Portals </a> <a class="dropdown-item" href="/libraries"> Libraries </a> </div> </li> </ul> <ul class="navbar-nav ml-auto navbar-nav__right navbar-subscribe justify-content-center align-items-center"> <li class="nav-item"> <a class="nav-link" rel="noreferrer" href="https://twitter.com/paperswithcode"> <span class="nav-link-social-icon icon-wrapper icon-fa icon-fa-brands" data-name="twitter"><svg viewBox="0 0 512.001 515.25" xmlns="http://www.w3.org/2000/svg"><path d="M459.37 152.016c.326 4.548.326 9.097.326 13.645 0 138.72-105.583 298.558-298.559 298.558C101.685 464.22 46.457 447 0 417.114c8.447.973 16.568 1.298 25.34 1.298 49.054 0 94.213-16.568 130.274-44.832-46.132-.975-84.792-31.188-98.113-72.772 6.499.975 12.996 1.624 19.819 1.624 9.42 0 18.843-1.3 27.613-3.573-48.08-9.747-84.142-51.98-84.142-102.984v-1.3c13.968 7.798 30.213 12.67 47.43 13.32-28.263-18.843-46.78-51.006-46.78-87.391 0-19.492 5.196-37.36 14.294-52.954 51.654 63.674 129.3 105.258 216.364 109.807-1.624-7.797-2.599-15.918-2.599-24.04 0-57.827 46.782-104.934 104.934-104.934 30.214 0 57.502 12.67 76.671 33.136 23.715-4.548 46.455-13.319 66.599-25.34-7.798 24.367-24.366 44.834-46.132 57.828 21.117-2.274 41.584-8.122 60.426-16.244-14.292 20.791-32.161 39.309-52.628 54.253z"/></svg></span> </a> </li> <li class="nav-item"> <a id="signin-link" class="nav-link" href="/accounts/login?next=/search">Sign In</a> </li> </ul> </div> </nav> <!-- Page modals --> <div class="modal fade" id="emailModal" tabindex="-1" role="dialog" aria-labelledby="emailModalLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h3 class="modal-title" id="emailModalLabel">Subscribe to the PwC Newsletter</h3> <button type="button" class="close" data-dismiss="modal" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">×</span> </button> </div> <form action="" method="post"> <div class="modal-body"> <div class="modal-body-info-text"> Stay informed on the latest trending ML papers with code, research developments, libraries, methods, and datasets.<br/><br/> <a href="/newsletter">Read previous issues</a> </div> <input type="hidden" name="csrfmiddlewaretoken" value="EasKwbFlIOI6byL50mbN2tH22oaHIdeUINlMRHojs4ZT4CKoD6v5BkM0XEqTJIw5"> <input placeholder="Enter your email" type="email" class="form-control pwc-email" name="address" id="id_address" max_length="100" required> </div> <div class="modal-footer"> <button type="submit" class="btn btn-primary">Subscribe</button> </div> </form> </div> </div> </div> <!-- Login --> <div class="modal fade" id="loginModal" tabindex="-1" role="dialog" aria-labelledby="loginModalLabel" aria-hidden="true"> <div class="modal-dialog" role="document"> <div class="modal-content"> <div class="modal-header"> <h5 class="modal-title" id="loginModalLabel">Join the community</h5> <button type="button" class="close btn-close" data-dismiss="modal" data-bs-dismiss="modal" aria-label="Close"> <span aria-hidden="true">×</span> </button> </div> <div class="login-modal-message"> You need to <a href="/accounts/login?next=/search">log in</a> to edit.<br/> You can <a href="/accounts/register?next=/search">create a new account</a> if you don't have one.<br/><br/> </div> </div> </div> </div> <div class="container content content-buffer "> <div class="author-search-page"> <div class="title home-page-header"> <div class="row"> <div class="col-lg-6"> <h2 class="home-page-title"> Search Results for author: <span class="author-name">Yizeng Han</span> </h2> <h3 class="home-page-subtitle">Found <b>29</b> papers, <b>23</b> papers with code</h3> </div> <div class="col-lg-6 index-group"> <div id="authorSortBy" style="float: right;" class="btn-group pull-right search-page-order-by" role="group"> <div class="btn-group dropdown" role="group"> <button id="btnGroupDrop1" type="button" class="btn btn-outline-secondary dropdown-toggle" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> Date Published <span class=" icon-wrapper icon-fa icon-fa-regular" data-name="chevron-down"><svg viewBox="0 0 448 513.795" xmlns="http://www.w3.org/2000/svg"><path d="M441.9 168.28c4.7 4.7 4.7 12.3 0 17l-209.4 209.4c-4.7 4.7-12.3 4.7-17 0L6.1 185.28c-4.7-4.7-4.7-12.3 0-17l19.8-19.8c4.7-4.7 12.3-4.7 17 0L224 329.18l181.1-180.7c4.701-4.7 12.302-4.7 17 0z"/></svg></span> </button> <div class="dropdown-menu" aria-labelledby="btnGroupDrop1"> <a class="dropdown-item" href="?q=author%3AYizeng+Han&order_by=date">Date Published</a> <a class="dropdown-item" href="?q=author%3AYizeng+Han&order_by=stars">Github Stars</a> </div> </div> <a href="?q=author%3AYizeng+Han&order=asc" type="button" class="btn btn-outline-secondary"> <span class=" icon-wrapper icon-fa icon-fa-regular" data-name="arrow-down"><svg viewBox="0 0 448 513.795" xmlns="http://www.w3.org/2000/svg"><path d="M441.9 251.08c4.7 4.7 4.7 12.3 0 17l-209.4 209.4c-4.7 4.7-12.3 4.7-17 0L6.1 268.08c-4.7-4.7-4.7-12.3 0-17l19.8-19.8c4.7-4.7 12.3-4.7 17 0L198 386.38V44.98c0-6.599 5.401-12 12-12h28c6.6 0 12 5.401 12 12v341.4l155.1-155.1c4.701-4.7 12.302-4.7 17 0z"/></svg></span> </a> </div> </div> </div> </div> <div class="infinite-container text-center"> <div class="row infinite-item item paper-card"> <!-- 2458538 --> <div class="col-lg-3 item-image-col"> <a href="/paper/enat-rethinking-spatial-temporal-interactions"> <div class="item-image" style="background-image: url('');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/enat-rethinking-spatial-temporal-interactions">ENAT: Rethinking Spatial-temporal Interactions in Token-based Image Synthesis</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/enat-rethinking-spatial-temporal-interactions#code">1 code implementation</a> • <span class="author-name-text item-date-pub">11 Nov 2024</span> • <span class="author-span "> <a href="/author/zanlin-ni">Zanlin Ni</a></span>, <span class="author-span "> <a href="/author/yulin-wang">Yulin Wang</a></span>, <span class="author-span "> <a href="/author/renping-zhou">Renping Zhou</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/jiayi-guo">Jiayi Guo</a></span>, <span class="author-span "> <a href="/author/zhiyuan-liu">Zhiyuan Liu</a></span>, <span class="author-span "> <a href="/author/yuan-yao-1">Yuan YAO</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">At the spatial level, we disentangle the computations of visible and mask tokens by encoding visible tokens independently, while decoding mask tokens conditioned on the fully encoded visible tokens.</p> <div class="sota"> </div> <p> <a href="/task/image-generation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/5ac09bd9-8785-4253-8cf4-4412dcd36426.jpg"> <span>Image Generation</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 9</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/enat-rethinking-spatial-temporal-interactions" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/enat-rethinking-spatial-temporal-interactions#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2454167 --> <div class="col-lg-3 item-image-col"> <a href="/paper/deer-vla-dynamic-inference-of-multimodal"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/e29b6686-72c5-4c71-98d8-ad24db8c76f5.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/deer-vla-dynamic-inference-of-multimodal">DeeR-VLA: Dynamic Inference of Multimodal Large Language Models for Efficient Robot Execution</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/deer-vla-dynamic-inference-of-multimodal#code">1 code implementation</a> • <span class="author-name-text item-date-pub">4 Nov 2024</span> • <span class="author-span "> <a href="/author/yang-yue">Yang Yue</a></span>, <span class="author-span "> <a href="/author/yulin-wang">Yulin Wang</a></span>, <span class="author-span "> <a href="/author/bingyi-kang">Bingyi Kang</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/shenzhi-wang">Shenzhi Wang</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/jiashi-feng">Jiashi Feng</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">MLLMs have demonstrated remarkable comprehension and reasoning capabilities with complex language and visual data.</p> <div class="sota"> </div> <p> <a href="/task/robot-manipulation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Robot Manipulation</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 25</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/deer-vla-dynamic-inference-of-multimodal" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/deer-vla-dynamic-inference-of-multimodal#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- None --> <div class="col-lg-3 item-image-col"> <a href="/paper/exploring-contextual-modeling-with-linear"> <div class="item-image" style="background-image: url('');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/exploring-contextual-modeling-with-linear">Exploring contextual modeling with linear complexity for point cloud segmentation</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/exploring-contextual-modeling-with-linear#code">no code implementations</a> • <span class="author-name-text item-date-pub">28 Oct 2024</span> • <span class="author-span "> <a href="/author/yong-xien-chng">Yong Xien Chng</a></span>, <span class="author-span "> <a href="/author/xuchong-qiu">Xuchong Qiu</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/yifan-pu">Yifan Pu</a></span>, <span class="author-span "> <a href="/author/jiewei-cao">Jiewei Cao</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">Recently, Mamba has emerged as a promising alternative, offering efficient long-range contextual modeling capabilities without the quadratic complexity associated with Transformer's attention mechanisms.</p> <div class="sota"> </div> <p> <a href="/task/mamba"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Mamba</span> </span> </a> <a href="/task/point-cloud-segmentation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Point Cloud Segmentation</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary" style="border:none;background-color:transparent"> </span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/exploring-contextual-modeling-with-linear" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/exploring-contextual-modeling-with-linear#code" class="badge badge-dark badge-nocode "> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Add Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2431722 --> <div class="col-lg-3 item-image-col"> <a href="/paper/dynamic-diffusion-transformer"> <div class="item-image" style="background-image: url('');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/dynamic-diffusion-transformer">Dynamic Diffusion Transformer</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/dynamic-diffusion-transformer#code">1 code implementation</a> • <span class="author-name-text item-date-pub">4 Oct 2024</span> • <span class="author-span "> <a href="/author/wangbo-zhao">Wangbo Zhao</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/jiasheng-tang">Jiasheng Tang</a></span>, <span class="author-span "> <a href="/author/kai-wang">Kai Wang</a></span>, <span class="author-span "> <a href="/author/yibing-song">Yibing Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span>, <span class="author-span "> <a href="/author/fan-wang">Fan Wang</a></span>, <span class="author-span "> <a href="/author/yang-you">Yang You</a></span> </p> <p class="item-strip-abstract">In addition, we design a Spatial-wise Dynamic Token (SDT) strategy to avoid redundant computation at unnecessary spatial locations.</p> <div class="sota"> </div> <p> <a href="/task/image-generation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/5ac09bd9-8785-4253-8cf4-4412dcd36426.jpg"> <span>Image Generation</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 35</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/dynamic-diffusion-transformer" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/dynamic-diffusion-transformer#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- None --> <div class="col-lg-3 item-image-col"> <a href="/paper/semantic-refocused-tuning-for-open-vocabulary"> <div class="item-image" style="background-image: url('');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/semantic-refocused-tuning-for-open-vocabulary">Semantic Refocused Tuning for Open-Vocabulary Panoptic Segmentation</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/semantic-refocused-tuning-for-open-vocabulary#code">no code implementations</a> • <span class="author-name-text item-date-pub">24 Sep 2024</span> • <span class="author-span "> <a href="/author/yong-xien-chng">Yong Xien Chng</a></span>, <span class="author-span "> <a href="/author/xuchong-qiu">Xuchong Qiu</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/kai-ding">Kai Ding</a></span>, <span class="author-span "> <a href="/author/wan-ding">Wan Ding</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">This adjustment allows the model to adapt the image focus of mask tokens to new distributions with minimal training resources, while preserving the VLM's pre-trained knowledge.</p> <div class="sota"> </div> <p> <a href="/task/open-vocabulary-panoptic-segmentation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Open Vocabulary Panoptic Segmentation</span> </span> </a> <a href="/task/panoptic-segmentation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/task-0000000895-b36c6778.jpg"> <span>Panoptic Segmentation</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary" style="border:none;background-color:transparent"> </span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/semantic-refocused-tuning-for-open-vocabulary" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/semantic-refocused-tuning-for-open-vocabulary#code" class="badge badge-dark badge-nocode "> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Add Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2425852 --> <div class="col-lg-3 item-image-col"> <a href="/paper/ostr-darts-differentiable-neural-architecture"> <div class="item-image" style="background-image: url('');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/ostr-darts-differentiable-neural-architecture">OStr-DARTS: Differentiable Neural Architecture Search based on Operation Strength</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/ostr-darts-differentiable-neural-architecture#code">1 code implementation</a> • <span class="author-name-text item-date-pub">22 Sep 2024</span> • <span class="author-span "> <a href="/author/le-yang">Le Yang</a></span>, <span class="author-span "> <a href="/author/ziwei-zheng">Ziwei Zheng</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span>, <span class="author-span "> <a href="/author/fan-li">Fan Li</a></span> </p> <p class="item-strip-abstract">Differentiable architecture search (DARTS) has emerged as a promising technique for effective neural architecture search, and it mainly contains two steps to find the high-performance architecture: First, the DARTS supernet that consists of mixed operations will be optimized via gradient descent.</p> <div class="sota"> </div> <p> <a href="/task/attribute"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Attribute</span> </span> </a> <a href="/task/architecture-search"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/ab71369b-c50a-4095-99ae-4df731cda975.jpg"> <span>Neural Architecture Search</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 3</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/ostr-darts-differentiable-neural-architecture" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/ostr-darts-differentiable-neural-architecture#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2400920 --> <div class="col-lg-3 item-image-col"> <a href="/paper/efficient-diffusion-transformer-with-step"> <div class="item-image" style="background-image: url('');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/efficient-diffusion-transformer-with-step">Efficient Diffusion Transformer with Step-wise Dynamic Attention Mediators</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/efficient-diffusion-transformer-with-step#code">1 code implementation</a> • <span class="author-name-text item-date-pub">11 Aug 2024</span> • <span class="author-span "> <a href="/author/yifan-pu">Yifan Pu</a></span>, <span class="author-span "> <a href="/author/zhuofan-xia">Zhuofan Xia</a></span>, <span class="author-span "> <a href="/author/jiayi-guo">Jiayi Guo</a></span>, <span class="author-span "> <a href="/author/dongchen-han">Dongchen Han</a></span>, <span class="author-span "> <a href="/author/qixiu-li">Qixiu Li</a></span>, <span class="author-span "> <a href="/author/duo-li">Duo Li</a></span>, <span class="author-span "> <a href="/author/yuhui-yuan">Yuhui Yuan</a></span>, <span class="author-span "> <a href="/author/ji-li">Ji Li</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span>, <span class="author-span "> <a href="/author/xiu-li">Xiu Li</a></span> </p> <p class="item-strip-abstract">In response to this observation, we present a novel diffusion transformer framework incorporating an additional set of mediator tokens to engage with queries and keys separately.</p> <div class="sota"> </div> <p> <a href="/task/denoising"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/6c4d53f8-9c6d-47c8-80c7-1b8e1c0a7d42.jpg"> <span>Denoising</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 34</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/efficient-diffusion-transformer-with-step" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/efficient-diffusion-transformer-with-step#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2392759 --> <div class="col-lg-3 item-image-col"> <a href="/paper/unitta-unified-benchmark-and-versatile"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/4175886b-b6a5-496e-8387-a831c65810c8.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/unitta-unified-benchmark-and-versatile">UniTTA: Unified Benchmark and Versatile Framework Towards Realistic Test-Time Adaptation</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/unitta-unified-benchmark-and-versatile#code">1 code implementation</a> • <span class="author-name-text item-date-pub">29 Jul 2024</span> • <span class="author-span "> <a href="/author/chaoqun-du">Chaoqun Du</a></span>, <span class="author-span "> <a href="/author/yulin-wang">Yulin Wang</a></span>, <span class="author-span "> <a href="/author/jiayi-guo">Jiayi Guo</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/jie-zhou-1">Jie zhou</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">To this end, we propose a Unified Test-Time Adaptation (UniTTA) benchmark, which is comprehensive and widely applicable.</p> <div class="sota"> </div> <p> <a href="/task/test-time-adaptation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Test-time Adaptation</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 16</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/unitta-unified-benchmark-and-versatile" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/unitta-unified-benchmark-and-versatile#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2372289 --> <div class="col-lg-3 item-image-col"> <a href="/paper/dyfadet-dynamic-feature-aggregation-for"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/73056a6e-b958-4b2c-81d4-ea684e978dad.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/dyfadet-dynamic-feature-aggregation-for">DyFADet: Dynamic Feature Aggregation for Temporal Action Detection</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/dyfadet-dynamic-feature-aggregation-for#code">1 code implementation</a> • <span class="author-name-text item-date-pub">3 Jul 2024</span> • <span class="author-span "> <a href="/author/le-yang">Le Yang</a></span>, <span class="author-span "> <a href="/author/ziwei-zheng">Ziwei Zheng</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/hao-cheng">Hao Cheng</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span>, <span class="author-span "> <a href="/author/fan-li">Fan Li</a></span> </p> <p class="item-strip-abstract">Based on DFA, the proposed dynamic encoder layer aggregates the temporal features within the action time ranges and guarantees the discriminability of the extracted representations.</p> <div class="sota"> <p> <a href="/sota/temporal-action-localization-on-hacs"> <img style="height:20px;width:35px;position:relative;top:1px;" src="https://production-media.paperswithcode.com/sota-thumbs/temporal-action-localization-on-hacs-small_410b58ec.png"/> </a> Ranked #3 on <a href="/sota/temporal-action-localization-on-hacs"> Temporal Action Localization on HACS </a> </p> </div> <p> <a href="/task/action-detection"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/fe4dde36-d569-498d-b386-af61cb831541.jpg"> <span>Action Detection</span> </span> </a> <a href="/task/action-recognition"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/21b7189c-5d64-46b0-aa33-3da648560eaa.jpg"> <span>Temporal Action Localization</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 13</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/dyfadet-dynamic-feature-aggregation-for" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/dyfadet-dynamic-feature-aggregation-for#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2345391 --> <div class="col-lg-3 item-image-col"> <a href="/paper/demystify-mamba-in-vision-a-linear-attention"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/fe2cb99e-9cae-4149-939a-c28255840e05.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/demystify-mamba-in-vision-a-linear-attention">Demystify Mamba in Vision: A Linear Attention Perspective</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/demystify-mamba-in-vision-a-linear-attention#code">1 code implementation</a> • <span class="author-name-text item-date-pub">26 May 2024</span> • <span class="author-span "> <a href="/author/dongchen-han">Dongchen Han</a></span>, <span class="author-span "> <a href="/author/ziyi-wang">Ziyi Wang</a></span>, <span class="author-span "> <a href="/author/zhuofan-xia">Zhuofan Xia</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/yifan-pu">Yifan Pu</a></span>, <span class="author-span "> <a href="/author/chunjiang-ge">Chunjiang Ge</a></span>, <span class="author-span "> <a href="/author/jun-song">Jun Song</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/bo-zheng">Bo Zheng</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">By exploring the similarities and disparities between the effective Mamba and subpar linear attention Transformer, we provide comprehensive analyses to demystify the key factors behind Mamba's success.</p> <div class="sota"> </div> <p> <a href="/task/image-classification"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/7a146e71-bbf8-4137-bf25-a3618bd043a0.jpg"> <span>Image Classification</span> </span> </a> <a href="/task/mamba"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Mamba</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 217</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/demystify-mamba-in-vision-a-linear-attention" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/demystify-mamba-in-vision-a-linear-attention#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2337852 --> <div class="col-lg-3 item-image-col"> <a href="/paper/efficienttrain-generalized-curriculum"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/c6e80d2f-f22b-4d8d-8944-395d43ddce24.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/efficienttrain-generalized-curriculum">EfficientTrain++: Generalized Curriculum Learning for Efficient Visual Backbone Training</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/efficienttrain-generalized-curriculum#code">1 code implementation</a> • <span class="author-name-text item-date-pub">14 May 2024</span> • <span class="author-span "> <a href="/author/yulin-wang">Yulin Wang</a></span>, <span class="author-span "> <a href="/author/yang-yue">Yang Yue</a></span>, <span class="author-span "> <a href="/author/rui-lu">Rui Lu</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">These patterns, when observed through frequency and spatial domains, incorporate lower-frequency components, and the natural image contents without distortion or data augmentation.</p> <div class="sota"> </div> <p> <a href="/task/data-augmentation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/task-0000001560-029cbc00.jpg"> <span>Data Augmentation</span> </span> </a> <a href="/task/self-supervised-learning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/task-0000001882-b4b42454.jpg"> <span>Self-Supervised Learning</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 212</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/efficienttrain-generalized-curriculum" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/efficienttrain-generalized-curriculum#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2302823 --> <div class="col-lg-3 item-image-col"> <a href="/paper/dynamic-tuning-towards-parameter-and"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/paper/2403.11808.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/dynamic-tuning-towards-parameter-and">Dynamic Tuning Towards Parameter and Inference Efficiency for ViT Adaptation</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/dynamic-tuning-towards-parameter-and#code">1 code implementation</a> • <span class="author-name-text item-date-pub">18 Mar 2024</span> • <span class="author-span "> <a href="/author/wangbo-zhao">Wangbo Zhao</a></span>, <span class="author-span "> <a href="/author/jiasheng-tang">Jiasheng Tang</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/yibing-song">Yibing Song</a></span>, <span class="author-span "> <a href="/author/kai-wang">Kai Wang</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span>, <span class="author-span "> <a href="/author/fan-wang">Fan Wang</a></span>, <span class="author-span "> <a href="/author/yang-you">Yang You</a></span> </p> <p class="item-strip-abstract">Existing parameter-efficient fine-tuning (PEFT) methods have achieved significant success on vision transformers (ViTs) adaptation by improving parameter efficiency.</p> <div class="sota"> </div> <p> <a href="/task/parameter-efficient-fine-tuning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>parameter-efficient fine-tuning</span> </span> </a> <a href="/task/semantic-segmentation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/0d834282-fd21-4e57-be69-d5c2ed538690.jpg"> <span>Semantic Segmentation</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/dynamic-tuning-towards-parameter-and#tasks"> <span class="badge badge-primary"> <b>+1</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 36</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/dynamic-tuning-towards-parameter-and" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/dynamic-tuning-towards-parameter-and#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- None --> <div class="col-lg-3 item-image-col"> <a href="/paper/gra-detecting-oriented-objects-through-group"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/paper/2403.11127.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/gra-detecting-oriented-objects-through-group">GRA: Detecting Oriented Objects through Group-wise Rotating and Attention</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/gra-detecting-oriented-objects-through-group#code">no code implementations</a> • <span class="author-name-text item-date-pub">17 Mar 2024</span> • <span class="author-span "> <a href="/author/jiangshan-wang">Jiangshan Wang</a></span>, <span class="author-span "> <a href="/author/yifan-pu">Yifan Pu</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/jiayi-guo">Jiayi Guo</a></span>, <span class="author-span "> <a href="/author/yiru-wang">Yiru Wang</a></span>, <span class="author-span "> <a href="/author/xiu-li">Xiu Li</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">GRA can adaptively capture fine-grained features of objects with diverse orientations, comprising two key components: Group-wise Rotating and Group-wise Attention.</p> <div class="sota"> </div> <p> <a href="/task/object"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Object</span> </span> </a> <a href="/task/object-detection-1"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>object-detection</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/gra-detecting-oriented-objects-through-group#tasks"> <span class="badge badge-primary"> <b>+2</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary" style="border:none;background-color:transparent"> </span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/gra-detecting-oriented-objects-through-group" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/gra-detecting-oriented-objects-through-group#code" class="badge badge-dark badge-nocode "> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Add Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2287132 --> <div class="col-lg-3 item-image-col"> <a href="/paper/simpro-a-simple-probabilistic-framework"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/927c3d17-17ae-4b9b-8593-47a2c80ab313.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/simpro-a-simple-probabilistic-framework">SimPro: A Simple Probabilistic Framework Towards Realistic Long-Tailed Semi-Supervised Learning</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/simpro-a-simple-probabilistic-framework#code">1 code implementation</a> • <span class="author-name-text item-date-pub">21 Feb 2024</span> • <span class="author-span "> <a href="/author/chaoqun-du">Chaoqun Du</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">Recent advancements in semi-supervised learning have focused on a more realistic yet challenging task: addressing imbalances in labeled data while the class distribution of unlabeled data remains both unknown and potentially mismatched.</p> <div class="sota"> </div> <p> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 25</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/simpro-a-simple-probabilistic-framework" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/simpro-a-simple-probabilistic-framework#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2408101 --> <div class="col-lg-3 item-image-col"> <a href="/paper/mask-grounding-for-referring-image"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/bb6f4b2b-8ba2-41e4-bd5e-19d04488ff7a.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/mask-grounding-for-referring-image">Mask Grounding for Referring Image Segmentation</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/mask-grounding-for-referring-image#code">1 code implementation</a> • <span class="item-conference-link"> <a href="/conference/cvpr-2024-1"> CVPR 2024 </a> </span> • <span class="author-span "> <a href="/author/yong-xien-chng">Yong Xien Chng</a></span>, <span class="author-span "> <a href="/author/henry-zheng">Henry Zheng</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/xuchong-qiu">Xuchong Qiu</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">To tackle this challenge, we introduce a novel Mask Grounding auxiliary task that significantly improves visual grounding within language features, by explicitly teaching the model to learn fine-grained correspondence between masked textual tokens and their matching visual objects.</p> <div class="sota"> <p> <a href="/sota/referring-expression-segmentation-on-refcoco-9"> <img style="height:20px;width:35px;position:relative;top:1px;" src="https://production-media.paperswithcode.com/sota-thumbs/referring-expression-segmentation-on-refcoco-9-small_4564e0b2.png"/> </a> Ranked #3 on <a href="/sota/referring-expression-segmentation-on-refcoco-9"> Referring Expression Segmentation on RefCOCO testB </a> </p> </div> <p> <a href="/task/cross-modal-alignment"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>cross-modal alignment</span> </span> </a> <a href="/task/image-segmentation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Image Segmentation</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/mask-grounding-for-referring-image#tasks"> <span class="badge badge-primary"> <b>+5</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 19</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/mask-grounding-for-referring-image" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/mask-grounding-for-referring-image#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2253188 --> <div class="col-lg-3 item-image-col"> <a href="/paper/gsva-generalized-segmentation-via-multimodal"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/be899c5a-a1a2-4fd0-af7f-839629c82374.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/gsva-generalized-segmentation-via-multimodal">GSVA: Generalized Segmentation via Multimodal Large Language Models</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/gsva-generalized-segmentation-via-multimodal#code">1 code implementation</a> • <span class="item-conference-link"> <a href="/conference/cvpr-2024-1"> CVPR 2024 </a> </span> • <span class="author-span "> <a href="/author/zhuofan-xia">Zhuofan Xia</a></span>, <span class="author-span "> <a href="/author/dongchen-han">Dongchen Han</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/xuran-pan">Xuran Pan</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">Generalized Referring Expression Segmentation (GRES) extends the scope of classic RES to refer to multiple objects in one expression or identify the empty targets absent in the image.</p> <div class="sota"> <p> <a href="/sota/generalized-referring-expression-segmentation"> <img style="height:20px;width:35px;position:relative;top:1px;" src="https://production-media.paperswithcode.com/sota-thumbs/generalized-referring-expression-segmentation-small_6ff7cd48.png"/> </a> Ranked #1 on <a class="sota-task" href="/sota/generalized-referring-expression-segmentation"> Generalized Referring Expression Segmentation on gRefCOCO </a> (using extra training data) </p> </div> <p> <a href="/task/decoder"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Decoder</span> </span> </a> <a href="/task/generalized-referring-expression-segmentation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/dc3d6ec1-75bb-4251-b6db-fa821063804b.jpg"> <span>Generalized Referring Expression Segmentation</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/gsva-generalized-segmentation-via-multimodal#tasks"> <span class="badge badge-primary"> <b>+2</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 96</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/gsva-generalized-segmentation-via-multimodal" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/gsva-generalized-segmentation-via-multimodal#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2251815 --> <div class="col-lg-3 item-image-col"> <a href="/paper/agent-attention-on-the-integration-of-softmax"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/9859e4e5-91df-4cb2-97e4-dece74af35dc.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/agent-attention-on-the-integration-of-softmax">Agent Attention: On the Integration of Softmax and Linear Attention</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/agent-attention-on-the-integration-of-softmax#code">2 code implementations</a> • <span class="author-name-text item-date-pub">14 Dec 2023</span> • <span class="author-span "> <a href="/author/dongchen-han">Dongchen Han</a></span>, <span class="author-span "> <a href="/author/tianzhu-ye">Tianzhu Ye</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/zhuofan-xia">Zhuofan Xia</a></span>, <span class="author-span "> <a href="/author/siyuan-pan">Siyuan Pan</a></span>, <span class="author-span "> <a href="/author/pengfei-wan">Pengfei Wan</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">Specifically, the Agent Attention, denoted as a quadruple $(Q, A, K, V)$, introduces an additional set of agent tokens $A$ into the conventional attention module.</p> <div class="sota"> </div> <p> <a href="/task/computational-efficiency"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Computational Efficiency</span> </span> </a> <a href="/task/image-classification"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/7a146e71-bbf8-4137-bf25-a3618bd043a0.jpg"> <span>Image Classification</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/agent-attention-on-the-integration-of-softmax#tasks"> <span class="badge badge-primary"> <b>+4</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 540</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/agent-attention-on-the-integration-of-softmax" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/agent-attention-on-the-integration-of-softmax#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2264167 --> <div class="col-lg-3 item-image-col"> <a href="/paper/fine-grained-recognition-with-learnable"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/paper/2309.00399.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/fine-grained-recognition-with-learnable">Fine-grained Recognition with Learnable Semantic Data Augmentation</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/fine-grained-recognition-with-learnable#code">1 code implementation</a> • <span class="author-name-text item-date-pub">1 Sep 2023</span> • <span class="author-span "> <a href="/author/yifan-pu">Yifan Pu</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/yulin-wang">Yulin Wang</a></span>, <span class="author-span "> <a href="/author/junlan-feng">Junlan Feng</a></span>, <span class="author-span "> <a href="/author/chao-deng">Chao Deng</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">Since images belonging to the same meta-category usually share similar visual appearances, mining discriminative visual cues is the key to distinguishing fine-grained categories.</p> <div class="sota"> </div> <p> <a href="/task/data-augmentation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/task-0000001560-029cbc00.jpg"> <span>Data Augmentation</span> </span> </a> <a href="/task/fine-grained-image-recognition"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Fine-Grained Image Recognition</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/fine-grained-recognition-with-learnable#tasks"> <span class="badge badge-primary"> <b>+2</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 27</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/fine-grained-recognition-with-learnable" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/fine-grained-recognition-with-learnable#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2187169 --> <div class="col-lg-3 item-image-col"> <a href="/paper/latency-aware-unified-dynamic-networks-for"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/ac51e4ad-49b5-4145-87bd-6b184156b3cc.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/latency-aware-unified-dynamic-networks-for">Latency-aware Unified Dynamic Networks for Efficient Image Recognition</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/latency-aware-unified-dynamic-networks-for#code">1 code implementation</a> • <span class="author-name-text item-date-pub">30 Aug 2023</span> • <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/zeyu-liu">Zeyu Liu</a></span>, <span class="author-span "> <a href="/author/zhihang-yuan">Zhihang Yuan</a></span>, <span class="author-span "> <a href="/author/yifan-pu">Yifan Pu</a></span>, <span class="author-span "> <a href="/author/chaofei-wang">Chaofei Wang</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">Dynamic computation has emerged as a promising avenue to enhance the inference efficiency of deep networks.</p> <div class="sota"> </div> <p> <a href="/task/scheduling"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Scheduling</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 42</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/latency-aware-unified-dynamic-networks-for" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/latency-aware-unified-dynamic-networks-for#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- None --> <div class="col-lg-3 item-image-col"> <a href="/paper/computation-efficient-deep-learning-for"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/paper/2308.13998.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/computation-efficient-deep-learning-for">Computation-efficient Deep Learning for Computer Vision: A Survey</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/computation-efficient-deep-learning-for#code">no code implementations</a> • <span class="author-name-text item-date-pub">27 Aug 2023</span> • <span class="author-span "> <a href="/author/yulin-wang">Yulin Wang</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/chaofei-wang">Chaofei Wang</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/qi-tian">Qi Tian</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">Over the past decade, deep learning models have exhibited considerable advancements, reaching or even exceeding human-level performance in a range of visual perception tasks.</p> <div class="sota"> </div> <p> <a href="/task/autonomous-vehicles"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/633884c2-f4f5-4234-b2dc-13e6281ffad4.jpg"> <span>Autonomous Vehicles</span> </span> </a> <a href="/task/deep-learning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Deep Learning</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/computation-efficient-deep-learning-for#tasks"> <span class="badge badge-primary"> <b>+3</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary" style="border:none;background-color:transparent"> </span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/computation-efficient-deep-learning-for" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/computation-efficient-deep-learning-for#code" class="badge badge-dark badge-nocode "> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Add Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2174375 --> <div class="col-lg-3 item-image-col"> <a href="/paper/flatten-transformer-vision-transformer-using"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/6e485122-07a5-4860-8e88-737bb22660fb.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/flatten-transformer-vision-transformer-using">FLatten Transformer: Vision Transformer using Focused Linear Attention</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/flatten-transformer-vision-transformer-using#code">1 code implementation</a> • <span class="item-conference-link"> <a href="/conference/iccv-2023-1"> ICCV 2023 </a> </span> • <span class="author-span "> <a href="/author/dongchen-han">Dongchen Han</a></span>, <span class="author-span "> <a href="/author/xuran-pan">Xuran Pan</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">The quadratic computation complexity of self-attention has been a persistent challenge when applying Transformer models to vision tasks.</p> <div class="sota"> </div> <p> <a href="/task/diversity"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Diversity</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 403</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/flatten-transformer-vision-transformer-using" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/flatten-transformer-vision-transformer-using#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2155585 --> <div class="col-lg-3 item-image-col"> <a href="/paper/dynamic-perceiver-for-efficient-visual"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/cc061e10-25b1-4f86-a080-1203f519993e.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/dynamic-perceiver-for-efficient-visual">Dynamic Perceiver for Efficient Visual Recognition</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/dynamic-perceiver-for-efficient-visual#code">1 code implementation</a> • <span class="item-conference-link"> <a href="/conference/iccv-2023-1"> ICCV 2023 </a> </span> • <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/dongchen-han">Dongchen Han</a></span>, <span class="author-span "> <a href="/author/zeyu-liu">Zeyu Liu</a></span>, <span class="author-span "> <a href="/author/yulin-wang">Yulin Wang</a></span>, <span class="author-span "> <a href="/author/xuran-pan">Xuran Pan</a></span>, <span class="author-span "> <a href="/author/yifan-pu">Yifan Pu</a></span>, <span class="author-span "> <a href="/author/chao-deng">Chao Deng</a></span>, <span class="author-span "> <a href="/author/junlan-feng">Junlan Feng</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">Early exits are placed exclusively within the classification branch, thus eliminating the need for linear separability in low-level features.</p> <div class="sota"> </div> <p> <a href="/task/action-recognition-in-videos"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/task-0000000145-670c75d8_lBJNcK5.jpg"> <span>Action Recognition</span> </span> </a> <a href="/task/classification-1"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/d0eafcb3-1a12-430b-8bb5-6f6bbff1a4b3.jpg"> <span>Classification</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/dynamic-perceiver-for-efficient-visual#tasks"> <span class="badge badge-primary"> <b>+4</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 41</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/dynamic-perceiver-for-efficient-visual" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/dynamic-perceiver-for-efficient-visual#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2196455 --> <div class="col-lg-3 item-image-col"> <a href="/paper/adaptive-rotated-convolution-for-rotated"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/25244169-5b8e-40c9-b1eb-acdce2fc1415.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/adaptive-rotated-convolution-for-rotated">Adaptive Rotated Convolution for Rotated Object Detection</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/adaptive-rotated-convolution-for-rotated#code">1 code implementation</a> • <span class="item-conference-link"> <a href="/conference/iccv-2023-1"> ICCV 2023 </a> </span> • <span class="author-span "> <a href="/author/yifan-pu">Yifan Pu</a></span>, <span class="author-span "> <a href="/author/yiru-wang">Yiru Wang</a></span>, <span class="author-span "> <a href="/author/zhuofan-xia">Zhuofan Xia</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/yulin-wang">Yulin Wang</a></span>, <span class="author-span "> <a href="/author/weihao-gan">Weihao Gan</a></span>, <span class="author-span "> <a href="/author/zidong-wang">Zidong Wang</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">In our ARC module, the convolution kernels rotate adaptively to extract object features with varying orientations in different images, and an efficient conditional computation mechanism is introduced to accommodate the large orientation variations of objects within an image.</p> <div class="sota"> <p> <a href="/sota/oriented-object-detection-on-dota-1-0"> <img style="height:20px;width:35px;position:relative;top:1px;" src="https://production-media.paperswithcode.com/sota-thumbs/oriented-object-detection-on-dota-1-0-small_ffa31d3c.png"/> </a> Ranked #3 on <a href="/sota/oriented-object-detection-on-dota-1-0"> Oriented Object Detection on DOTA 1.0 </a> </p> </div> <p> <a href="/task/arc"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>ARC</span> </span> </a> <a href="/task/object"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Object</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/adaptive-rotated-convolution-for-rotated#tasks"> <span class="badge badge-primary"> <b>+3</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 116</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/adaptive-rotated-convolution-for-rotated" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/adaptive-rotated-convolution-for-rotated#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2187170 --> <div class="col-lg-3 item-image-col"> <a href="/paper/latency-aware-spatial-wise-dynamic-networks"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/29aa46d3-0d11-4520-b634-1cf930f445c2.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/latency-aware-spatial-wise-dynamic-networks">Latency-aware Spatial-wise Dynamic Networks</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/latency-aware-spatial-wise-dynamic-networks#code">2 code implementations</a> • <span class="author-name-text item-date-pub">12 Oct 2022</span> • <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/zhihang-yuan">Zhihang Yuan</a></span>, <span class="author-span "> <a href="/author/yifan-pu">Yifan Pu</a></span>, <span class="author-span "> <a href="/author/chenhao-xue">Chenhao Xue</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/guangyu-sun">Guangyu Sun</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">The latency prediction model can efficiently estimate the inference latency of dynamic networks by simultaneously considering algorithms, scheduling strategies, and hardware properties.</p> <div class="sota"> </div> <p> <a href="/task/image-classification"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/7a146e71-bbf8-4137-bf25-a3618bd043a0.jpg"> <span>Image Classification</span> </span> </a> <a href="/task/instance-segmentation"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/task-0000000003-d945c034_vTlkCrf.jpg"> <span>Instance Segmentation</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/latency-aware-spatial-wise-dynamic-networks#tasks"> <span class="badge badge-primary"> <b>+4</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 42</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/latency-aware-spatial-wise-dynamic-networks" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/latency-aware-spatial-wise-dynamic-networks#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 2021815 --> <div class="col-lg-3 item-image-col"> <a href="/paper/learning-to-weight-samples-for-dynamic-early"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/9ff6d577-d97d-4f40-b13c-00a44203e2ba.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/learning-to-weight-samples-for-dynamic-early">Learning to Weight Samples for Dynamic Early-exiting Networks</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/learning-to-weight-samples-for-dynamic-early#code">1 code implementation</a> • <span class="author-name-text item-date-pub">17 Sep 2022</span> • <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/yifan-pu">Yifan Pu</a></span>, <span class="author-span "> <a href="/author/zihang-lai">Zihang Lai</a></span>, <span class="author-span "> <a href="/author/chaofei-wang">Chaofei Wang</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/junfen-cao">Junfen Cao</a></span>, <span class="author-span "> <a href="/author/wenhui-huang">Wenhui Huang</a></span>, <span class="author-span "> <a href="/author/chao-deng">Chao Deng</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">Intuitively, easy samples, which generally exit early in the network during inference, should contribute more to training early classifiers.</p> <div class="sota"> </div> <p> <a href="/task/meta-learning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/task-0000001088-6b0b3a7f_0bh9941.jpg"> <span>Meta-Learning</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 32</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/learning-to-weight-samples-for-dynamic-early" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/learning-to-weight-samples-for-dynamic-early#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- None --> <div class="col-lg-3 item-image-col"> <a href="/paper/towards-learning-spatially-discriminative"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/paper/2109.01359.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/towards-learning-spatially-discriminative">CAM-loss: Towards Learning Spatially Discriminative Feature Representations</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/towards-learning-spatially-discriminative#code">no code implementations</a> • <span class="item-conference-link"> <a href="/conference/iccv-2021-1"> ICCV 2021 </a> </span> • <span class="author-span "> <a href="/author/chaofei-wang">Chaofei Wang</a></span>, <span class="author-span "> <a href="/author/jiayu-xiao">Jiayu Xiao</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/qisen-yang">Qisen Yang</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">The backbone of traditional CNN classifier is generally considered as a feature extractor, followed by a linear layer which performs the classification.</p> <div class="sota"> </div> <p> <a href="/task/few-shot-learning"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Few-Shot Learning</span> </span> </a> <a href="/task/image-classification"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/thumbnails/task/7a146e71-bbf8-4137-bf25-a3618bd043a0.jpg"> <span>Image Classification</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/towards-learning-spatially-discriminative#tasks"> <span class="badge badge-primary"> <b>+2</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary" style="border:none;background-color:transparent"> </span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/towards-learning-spatially-discriminative" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/towards-learning-spatially-discriminative#code" class="badge badge-dark badge-nocode "> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Add Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 1770828 --> <div class="col-lg-3 item-image-col"> <a href="/paper/adaptive-focus-for-efficient-video"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/pgr-0001770828-39f0f852.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/adaptive-focus-for-efficient-video">Adaptive Focus for Efficient Video Recognition</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/adaptive-focus-for-efficient-video#code">1 code implementation</a> • <span class="item-conference-link"> <a href="/conference/iccv-2021-1"> ICCV 2021 </a> </span> • <span class="author-span "> <a href="/author/yulin-wang">Yulin Wang</a></span>, <span class="author-span "> <a href="/author/zhaoxi-chen">Zhaoxi Chen</a></span>, <span class="author-span "> <a href="/author/haojun-jiang">Haojun Jiang</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">In this paper, we explore the spatial redundancy in video recognition with the aim to improve the computational efficiency.</p> <div class="sota"> </div> <p> <a href="/task/computational-efficiency"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Computational Efficiency</span> </span> </a> <a href="/task/video-recognition"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Video Recognition</span> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 123</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/adaptive-focus-for-efficient-video" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/adaptive-focus-for-efficient-video#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- None --> <div class="col-lg-3 item-image-col"> <a href="/paper/dynamic-neural-networks-a-survey"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/paper/2102.04906.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/dynamic-neural-networks-a-survey">Dynamic Neural Networks: A Survey</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/dynamic-neural-networks-a-survey#code">no code implementations</a> • <span class="author-name-text item-date-pub">9 Feb 2021</span> • <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/le-yang">Le Yang</a></span>, <span class="author-span "> <a href="/author/honghui-wang">Honghui Wang</a></span>, <span class="author-span "> <a href="/author/yulin-wang">Yulin Wang</a></span> </p> <p class="item-strip-abstract">Dynamic neural network is an emerging research topic in deep learning.</p> <div class="sota"> </div> <p> <a href="/task/computational-efficiency"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Computational Efficiency</span> </span> </a> <a href="/task/decision-making"> <span class="badge badge-primary"> <img src="https://production-media.paperswithcode.com/tasks/default.gif"> <span>Decision Making</span> </span> </a> <a style="position: relative; top: -2px;" href="/paper/dynamic-neural-networks-a-survey#tasks"> <span class="badge badge-primary"> <b>+1</b> </span> </a> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary" style="border:none;background-color:transparent"> </span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/dynamic-neural-networks-a-survey" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/dynamic-neural-networks-a-survey#code" class="badge badge-dark badge-nocode "> <span class=" icon-wrapper icon-ion" data-name="add"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path fill="none" stroke="#000" stroke-linecap="round" stroke-linejoin="round" stroke-width="32" d="M256 112v288m144-144H112"/></svg></span> Add Code </a> <br/> </div> </div> </div> </div> </div> <div class="row infinite-item item paper-card"> <!-- 1607909 --> <div class="col-lg-3 item-image-col"> <a href="/paper/resolution-adaptive-networks-for-efficient"> <div class="item-image" style="background-image: url('https://production-media.paperswithcode.com/thumbnails/papergithubrepo/pgr-0001607909-0b35cbc0.jpg');"> </div> </a> </div> <div class="col-lg-9 item-col"> <div class="row"> <div class="col-lg-9 item-content"> <h1><a href="/paper/resolution-adaptive-networks-for-efficient">Resolution Adaptive Networks for Efficient Inference</a></h1> <p class="author-section" style="padding-top:2px"> <a href="/paper/resolution-adaptive-networks-for-efficient#code">2 code implementations</a> • <span class="item-conference-link"> <a href="/conference/cvpr-2020-6"> CVPR 2020 </a> </span> • <span class="author-span "> <a href="/author/le-yang">Le Yang</a></span>, <span class="author-span author-matched"> <a href="/author/yizeng-han">Yizeng Han</a></span>, <span class="author-span "> <a href="/author/xi-chen">Xi Chen</a></span>, <span class="author-span "> <a href="/author/shiji-song">Shiji Song</a></span>, <span class="author-span "> <a href="/author/jifeng-dai">Jifeng Dai</a></span>, <span class="author-span "> <a href="/author/gao-huang">Gao Huang</a></span> </p> <p class="item-strip-abstract">Adaptive inference is an effective mechanism to achieve a dynamic tradeoff between accuracy and computational cost in deep networks.</p> <div class="sota"> </div> <p> </p> </div> <div class="col-lg-3 item-interact text-center"> <div class="entity-stars"> <span class="badge badge-secondary"><span class=" icon-wrapper icon-ion" data-name="star"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M394 480a16 16 0 0 1-9.39-3L256 383.76 127.39 477a16 16 0 0 1-24.55-18.08L153 310.35 23 221.2a16 16 0 0 1 9-29.2h160.38l48.4-148.95a16 16 0 0 1 30.44 0l48.4 149H480a16 16 0 0 1 9.05 29.2L359 310.35l50.13 148.53A16 16 0 0 1 394 480z"/></svg></span> 149</span> </div> <div class="entity" style="margin-bottom: 20px;"> <a href="/paper/resolution-adaptive-networks-for-efficient" class="badge badge-light "> <span class=" icon-wrapper icon-ion" data-name="document"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M428 224H288a48 48 0 0 1-48-48V36a4 4 0 0 0-4-4h-92a64 64 0 0 0-64 64v320a64 64 0 0 0 64 64h224a64 64 0 0 0 64-64V228a4 4 0 0 0-4-4z"/><path d="M419.22 188.59L275.41 44.78a2 2 0 0 0-3.41 1.41V176a16 16 0 0 0 16 16h129.81a2 2 0 0 0 1.41-3.41z"/></svg></span> Paper </a> <br/> <a href="/paper/resolution-adaptive-networks-for-efficient#code" class="badge badge-dark "> <span class=" icon-wrapper icon-ion" data-name="logo-github"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M256 32C132.3 32 32 134.9 32 261.7c0 101.5 64.2 187.5 153.2 217.9a17.56 17.56 0 0 0 3.8.4c8.3 0 11.5-6.1 11.5-11.4 0-5.5-.2-19.9-.3-39.1a102.4 102.4 0 0 1-22.6 2.7c-43.1 0-52.9-33.5-52.9-33.5-10.2-26.5-24.9-33.6-24.9-33.6-19.5-13.7-.1-14.1 1.4-14.1h.1c22.5 2 34.3 23.8 34.3 23.8 11.2 19.6 26.2 25.1 39.6 25.1a63 63 0 0 0 25.6-6c2-14.8 7.8-24.9 14.2-30.7-49.7-5.8-102-25.5-102-113.5 0-25.1 8.7-45.6 23-61.6-2.3-5.8-10-29.2 2.2-60.8a18.64 18.64 0 0 1 5-.5c8.1 0 26.4 3.1 56.6 24.1a208.21 208.21 0 0 1 112.2 0c30.2-21 48.5-24.1 56.6-24.1a18.64 18.64 0 0 1 5 .5c12.2 31.6 4.5 55 2.2 60.8 14.3 16.1 23 36.6 23 61.6 0 88.2-52.4 107.6-102.3 113.3 8 7.1 15.2 21.1 15.2 42.5 0 30.7-.3 55.5-.3 63 0 5.4 3.1 11.5 11.4 11.5a19.35 19.35 0 0 0 4-.4C415.9 449.2 480 363.1 480 261.7 480 134.9 379.7 32 256 32z"/></svg></span> Code </a> <br/> </div> </div> </div> </div> </div> </div> <div class="loading" style="display: none;"> <div class="loader-ellips infinite-scroll-request"> <span class="loader-ellips__dot"></span> <span class="loader-ellips__dot"></span> <span class="loader-ellips__dot"></span> <span class="loader-ellips__dot"></span> </div> </div> <div class="search-submit-paper text-center" style="font-size:16px;padding-bottom:30px;"> Cannot find the paper you are looking for? You can <a href="/submit-paper">Submit</a> a new open access paper. </div> </div> </div> <div class="footer"> <div class="footer-contact"> <span class="footer-contact-item">Contact us on:</span> <a class="footer-contact-item" href="mailto:hello@paperswithcode.com"> <span class=" icon-wrapper icon-ion" data-name="mail"><svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><path d="M424 80H88a56.06 56.06 0 0 0-56 56v240a56.06 56.06 0 0 0 56 56h336a56.06 56.06 0 0 0 56-56V136a56.06 56.06 0 0 0-56-56zm-14.18 92.63l-144 112a16 16 0 0 1-19.64 0l-144-112a16 16 0 1 1 19.64-25.26L256 251.73l134.18-104.36a16 16 0 0 1 19.64 25.26z"/></svg></span> hello@paperswithcode.com </a>. <span class="footer-contact-item"> Papers With Code is a free resource with all data licensed under <a rel="noreferrer" href="https://creativecommons.org/licenses/by-sa/4.0/">CC-BY-SA</a>. </span> </div> <div class="footer-links"> <a href="/site/terms">Terms</a> <a href="/site/data-policy">Data policy</a> <a href="/site/cookies-policy">Cookies policy</a> <a href="/about#team" class="fair-logo"> from <img src=""> </a> </div> </div> <script> run_after_frontend_loaded.push(() => { var infinite = new Waypoint.Infinite({ element: $('.infinite-container')[0], onBeforePageLoad: function () { $('.loading').show(); }, onAfterPageLoad: function ($items) { $('.loading').hide(); } }); }); function toggleTweets(paper_id){ var element_id = "#tweets-" + paper_id; $( element_id ).toggle("slow"); } function fullHeight(paper_id){ var element_id = "#tweets-" + paper_id; // hide read more element var read_more = $("#see-more-"+paper_id); read_more.fadeOut(); $(element_id).get(0).style.setProperty("max-height", "9999px") } </script> <script> run_after_frontend_loaded.push(() => { $(function() { $.fn.modal.Constructor.prototype._enforceFocus = function() {}; $.widget( "custom.catcomplete", $.ui.autocomplete, { _create: function() { this._super(); this.widget().menu( "option", "items", "> :not(.ui-autocomplete-category)" ); }, /** Overrides the _renderItem method in jquery to allow for search result images and icons **/ _renderItem: function( ul, item ) { /** If we have an image in the seearch item then render it; if no task image available, use default **/ if ( "image" in item ) { if ( item.image ) { var image_url = item.image; } else { var image_url = "https://production-media.paperswithcode.com/" + "tasks/default.gif"; } return $( "<li>" ) .append( $( "<div>" ).text( item.label ).prepend( $( "<img src=" + image_url + ">") ) ) .appendTo( ul ); } else { return $( "<li>" ) .append($("<div>").text(item.label)) .appendTo( ul ); } }, _renderMenu: function( ul, items ) { var that = this, currentCategory = ""; $.each( items, function( index, item ) { var li; if ( item.category != currentCategory ) { ul.append( "<li class='ui-autocomplete-category'>" + item.category + "</li>" ); currentCategory = item.category; } li = that._renderItemData( ul, item ); if (item.meta !== null) { li.attr('data-qmeta', item.meta); } if ( item.category ) { li.attr( "aria-label", item.category + " : " + item.label ); } }); } }); $( "#id_global_search_input" ).catcomplete({ minLength: 2, source: function( request, response ) { var term = request.term; $.get( "/api/search-autocomplete/", {"q": term}, function(data){ let t = data.tasks, lb = data.leaderboards, p = data.papers, d = data.datasets, m = data.methods; let ts = [], lbs = [], ps = [], ds = [], ms = []; let total = 0; let maxItems = 12; for (let i=0; i<5 && total < maxItems; i++) { if (t.length && total < maxItems) { ts.push({ label: t[0].name, image: t[0].image, category: "Tasks", meta: null, }); t.shift(); total ++; } if (lb.length && total < maxItems) { lbs.push({ label: lb[0].name, image: lb[0].image, category: "Leaderboards", meta: lb[0].slug }); lb.shift(); total ++; } if (p.length && total < maxItems) { ps.push({label: p[0].title, category: "Papers", meta: null}); p.shift(); total ++; } if (d.length && total < maxItems) { ds.push({ label: d[0].name, image: d[0].image, category: "Datasets", meta: d[0].slug, }); d.shift(); total ++; } if (m.length && total < maxItems) { ms.push({ label: m[0].name, image: m[0].image, category: "Methods", meta: null }); m.shift(); total ++; } } let searchData = ts.concat(lbs, ps, ds, ms); response(searchData); }); }, select: function(event, ui) { $("#id_global_search_input").val(ui.item.label); if (typeof gtag !== 'undefined') { gtag('event', 'SiteActions', { 'event_category': 'Search', 'event_label': ui.item.category, }); } if (ui.item.meta === null) { $('#q_meta').val(''); $('#q_meta').removeAttr('name'); } else { if(!$('#q_meta').attr("name")) { $('#q_meta').attr('name', 'q_meta'); } $('#q_meta').val(ui.item.meta); } $('#q_type').val(ui.item.category.toLowerCase()); $("#id_global_search_form").submit(); } }); if ($(window).width() < 1200 && $(window).width() > 992 ) { $("#id_global_search_input").attr("placeholder", "Search"); } // Setup csrf token for ajax requests let getCookie = (name) => { var cookieValue = null; if (document.cookie && document.cookie !== '') { var cookies = document.cookie.split(';'); for (var i = 0; i < cookies.length; i++) { var cookie = jQuery.trim(cookies[i]); // Does this cookie string begin with the name we want? if (cookie.substring(0, name.length + 1) === (name + '=')) { cookieValue = decodeURIComponent(cookie.substring(name.length + 1)); break; } } } return cookieValue; }; let csrftoken = getCookie('csrftoken'); // Make sure we use the most up-to-date CSRF token $("input[name='csrfmiddlewaretoken']").val(csrftoken); function csrfSafeMethod(method) { // these HTTP methods do not require CSRF protection return (/^(GET|HEAD|OPTIONS|TRACE)$/.test(method)); } $.ajaxSetup({ beforeSend: function(xhr, settings) { if (!csrfSafeMethod(settings.type) && !this.crossDomain) { xhr.setRequestHeader("X-CSRFToken", csrftoken); } } }); }); }); </script> <script>!function(e){function t(t){for(var n,a,s=t[0],u=t[1],f=t[2],i=0,d=[];i<s.length;i++)a=s[i],Object.prototype.hasOwnProperty.call(o,a)&&o[a]&&d.push(o[a][0]),o[a]=0;for(n in u)Object.prototype.hasOwnProperty.call(u,n)&&(e[n]=u[n]);for(l&&l(t);d.length;)d.shift()();return c.push.apply(c,f||[]),r()}function r(){for(var e,t=0;t<c.length;t++){for(var r=c[t],n=!0,a=1;a<r.length;a++){var u=r[a];0!==o[u]&&(n=!1)}n&&(c.splice(t--,1),e=s(s.s=r[0]))}return e}var n={},a={11:0},o={11:0},c=[];function s(t){if(n[t])return n[t].exports;var r=n[t]={i:t,l:!1,exports:{}};return e[t].call(r.exports,r,r.exports,s),r.l=!0,r.exports}s.e=function(e){var t=[];a[e]?t.push(a[e]):0!==a[e]&&{2:1,3:1,5:1,6:1,8:1,9:1,10:1}[e]&&t.push(a[e]=new Promise((function(t,r){for(var n="static/css/"+({4:"chart",5:"conference-page",6:"example-page",8:"newsletters-create-page",9:"newsletters-edit-page",10:"newsletters-list-page",12:"table"}[e]||e)+"."+{0:"31d6cfe0",1:"31d6cfe0",2:"5745a9fd",3:"05600cd7",4:"31d6cfe0",5:"67565070",6:"8444f163",8:"f8a273b3",9:"f8a273b3",10:"db3e0a85",12:"31d6cfe0",14:"31d6cfe0",15:"31d6cfe0"}[e]+".chunk.css",o=s.p+n,c=document.getElementsByTagName("link"),u=0;u<c.length;u++){var f=(l=c[u]).getAttribute("data-href")||l.getAttribute("href");if("stylesheet"===l.rel&&(f===n||f===o))return t()}var i=document.getElementsByTagName("style");for(u=0;u<i.length;u++){var l;if((f=(l=i[u]).getAttribute("data-href"))===n||f===o)return t()}var d=document.createElement("link");d.rel="stylesheet",d.type="text/css",d.onload=t,d.onerror=function(t){var n=t&&t.target&&t.target.src||o,c=new Error("Loading CSS chunk "+e+" failed.\n("+n+")");c.code="CSS_CHUNK_LOAD_FAILED",c.request=n,delete a[e],d.parentNode.removeChild(d),r(c)},d.href=o,document.getElementsByTagName("head")[0].appendChild(d)})).then((function(){a[e]=0})));var r=o[e];if(0!==r)if(r)t.push(r[2]);else{var n=new Promise((function(t,n){r=o[e]=[t,n]}));t.push(r[2]=n);var c,u=document.createElement("script");u.charset="utf-8",u.timeout=120,s.nc&&u.setAttribute("nonce",s.nc),u.src=function(e){return s.p+"static/js/"+({4:"chart",5:"conference-page",6:"example-page",8:"newsletters-create-page",9:"newsletters-edit-page",10:"newsletters-list-page",12:"table"}[e]||e)+"."+{0:"041a0327",1:"eb8f85bf",2:"57df0e43",3:"dd682e9c",4:"934a42ca",5:"ddc33be8",6:"f5234ef0",8:"c76f72bd",9:"aa24afbf",10:"a749f71a",12:"c5756280",14:"be7b1031",15:"b8393014"}[e]+".chunk.js"}(e);var f=new Error;c=function(t){u.onerror=u.onload=null,clearTimeout(i);var r=o[e];if(0!==r){if(r){var n=t&&("load"===t.type?"missing":t.type),a=t&&t.target&&t.target.src;f.message="Loading chunk "+e+" failed.\n("+n+": "+a+")",f.name="ChunkLoadError",f.type=n,f.request=a,r[1](f)}o[e]=void 0}};var i=setTimeout((function(){c({type:"timeout",target:u})}),12e4);u.onerror=u.onload=c,document.head.appendChild(u)}return Promise.all(t)},s.m=e,s.c=n,s.d=function(e,t,r){s.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:r})},s.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},s.t=function(e,t){if(1&t&&(e=s(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var r=Object.create(null);if(s.r(r),Object.defineProperty(r,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var n in e)s.d(r,n,function(t){return e[t]}.bind(null,n));return r},s.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return s.d(t,"a",t),t},s.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},s.p="https://production-assets.paperswithcode.com/",s.oe=function(e){throw console.error(e),e};var u=this.webpackJsonpfrontend=this.webpackJsonpfrontend||[],f=u.push.bind(u);u.push=t,u=u.slice();for(var i=0;i<u.length;i++)t(u[i]);var l=f;r()}([])</script><script src="https://production-assets.paperswithcode.com/static/js/13.aa3fa037.chunk.js"></script><script src="https://production-assets.paperswithcode.com/static/js/main.99ee382b.chunk.js"></script> </body> </html>