CINXE.COM
Image To Image Translation
<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><style> html, body, button { font-family: '__Lato_814572', '__Lato_Fallback_814572', Helvetica Neue, Helvetica, Arial, sans-serif !important; } </style><link rel="canonical" href="https://www.catalyzex.com/s/Image%20To%20Image%20Translation"/><style> html, body, button { font-family: '__Lato_814572', '__Lato_Fallback_814572', Helvetica Neue, Helvetica, Arial, sans-serif !important; } </style><meta name="viewport" content="width=device-width, initial-scale=1.0, shrink-to-fit=no"/><title>Image To Image Translation</title><meta name="description" content="Image-to-image translation is the process of converting an image from one domain to another using deep learning techniques. Browse open-source code and papers on Image To Image Translation to catalyze your projects, and easily connect with engineers and experts when you need help."/><meta property="og:title" content="Image To Image Translation"/><meta property="og:description" content="Image-to-image translation is the process of converting an image from one domain to another using deep learning techniques. Browse open-source code and papers on Image To Image Translation to catalyze your projects, and easily connect with engineers and experts when you need help."/><meta name="twitter:title" content="Image To Image Translation"/><meta name="twitter:description" content="Image-to-image translation is the process of converting an image from one domain to another using deep learning techniques. Browse open-source code and papers on Image To Image Translation to catalyze your projects, and easily connect with engineers and experts when you need help."/><meta name="twitter:image" content="https://www.catalyzex.com/favicon.ico"/><meta name="og:image" content="https://www.catalyzex.com/favicon.ico"/><script type="application/ld+json">{"@context":"https://schema.org","@graph":[{"@type":"SearchResultsPage","name":"Image To Image Translation","description":"Image-to-image translation is the process of converting an image from one domain to another using deep learning techniques. Browse open-source code and papers on Image To Image Translation to catalyze your projects, and easily connect with engineers and experts when you need help.","url":"https://www.catalyzex.com/s/Image To Image Translation","mainEntity":[{"@type":"ItemList","name":"Image To Image Translation papers","numberOfItems":200,"itemListElement":[{"@type":"ListItem","position":1,"item":{"@type":"ScholarlyArticle","url":"https://www.catalyzex.com/paper/lapig-cross-modal-generation-of-paired","name":"LaPIG: Cross-Modal Generation of Paired Thermal and Visible Facial Images","datePublished":"2025-03-20","author":[{"@type":"Person","name":"Leyang Wang","url":"https://www.catalyzex.com/author/Leyang Wang"},{"@type":"Person","name":"Joice Lin","url":"https://www.catalyzex.com/author/Joice Lin"}]}},{"@type":"ListItem","position":2,"item":{"@type":"ScholarlyArticle","url":"https://www.catalyzex.com/paper/single-step-bidirectional-unpaired-image","name":"Single-Step Bidirectional Unpaired Image Translation Using Implicit Bridge Consistency Distillation","datePublished":"2025-03-19","author":[{"@type":"Person","name":"Suhyeon Lee","url":"https://www.catalyzex.com/author/Suhyeon Lee"},{"@type":"Person","name":"Kwanyoung Kim","url":"https://www.catalyzex.com/author/Kwanyoung Kim"},{"@type":"Person","name":"Jong Chul Ye","url":"https://www.catalyzex.com/author/Jong Chul Ye"}]}},{"@type":"ListItem","position":3,"item":{"@type":"ScholarlyArticle","url":"https://www.catalyzex.com/paper/openmibood-open-medical-imaging-benchmarks","name":"OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection","datePublished":"2025-03-20","author":[{"@type":"Person","name":"Max Gutbrod","url":"https://www.catalyzex.com/author/Max Gutbrod"},{"@type":"Person","name":"David Rauber","url":"https://www.catalyzex.com/author/David Rauber"},{"@type":"Person","name":"Danilo Weber Nunes","url":"https://www.catalyzex.com/author/Danilo Weber Nunes"},{"@type":"Person","name":"Christoph Palm","url":"https://www.catalyzex.com/author/Christoph Palm"}]}},{"@type":"ListItem","position":4,"item":{"@type":"ScholarlyArticle","url":"https://www.catalyzex.com/paper/whole-body-image-to-image-translation-for-a","name":"Whole-Body Image-to-Image Translation for a Virtual Scanner in a Healthcare Digital Twin","datePublished":"2025-03-18","author":[{"@type":"Person","name":"Valerio Guarrasi","url":"https://www.catalyzex.com/author/Valerio Guarrasi"},{"@type":"Person","name":"Francesco Di Feola","url":"https://www.catalyzex.com/author/Francesco Di Feola"},{"@type":"Person","name":"Rebecca Restivo","url":"https://www.catalyzex.com/author/Rebecca Restivo"},{"@type":"Person","name":"Lorenzo Tronchin","url":"https://www.catalyzex.com/author/Lorenzo Tronchin"},{"@type":"Person","name":"Paolo Soda","url":"https://www.catalyzex.com/author/Paolo Soda"}]}},{"@type":"ListItem","position":5,"item":{"@type":"ScholarlyArticle","url":"https://www.catalyzex.com/paper/ultrasound-image-to-video-synthesis-via","name":"Ultrasound Image-to-Video Synthesis via Latent Dynamic Diffusion Models","datePublished":"2025-03-19","author":[{"@type":"Person","name":"Tingxiu Chen","url":"https://www.catalyzex.com/author/Tingxiu Chen"},{"@type":"Person","name":"Yilei Shi","url":"https://www.catalyzex.com/author/Yilei Shi"},{"@type":"Person","name":"Zixuan Zheng","url":"https://www.catalyzex.com/author/Zixuan Zheng"},{"@type":"Person","name":"Bingcong Yan","url":"https://www.catalyzex.com/author/Bingcong Yan"},{"@type":"Person","name":"Jingliang Hu","url":"https://www.catalyzex.com/author/Jingliang Hu"},{"@type":"Person","name":"Xiao Xiang Zhu","url":"https://www.catalyzex.com/author/Xiao Xiang Zhu"},{"@type":"Person","name":"Lichao Mou","url":"https://www.catalyzex.com/author/Lichao Mou"}]}},{"@type":"ListItem","position":6,"item":{"@type":"ScholarlyArticle","url":"https://www.catalyzex.com/paper/arc-anchored-representation-clouds-for-high","name":"ARC: Anchored Representation Clouds for High-Resolution INR Classification","datePublished":"2025-03-19","author":[{"@type":"Person","name":"Joost Luijmes","url":"https://www.catalyzex.com/author/Joost Luijmes"},{"@type":"Person","name":"Alexander Gielisse","url":"https://www.catalyzex.com/author/Alexander Gielisse"},{"@type":"Person","name":"Roman Knyazhitskiy","url":"https://www.catalyzex.com/author/Roman Knyazhitskiy"},{"@type":"Person","name":"Jan van Gemert","url":"https://www.catalyzex.com/author/Jan van Gemert"}]}},{"@type":"ListItem","position":7,"item":{"@type":"ScholarlyArticle","url":"https://www.catalyzex.com/paper/shapeshift-towards-text-to-shape-arrangement","name":"ShapeShift: Towards Text-to-Shape Arrangement Synthesis with Content-Aware Geometric Constraints","datePublished":"2025-03-18","author":[{"@type":"Person","name":"Vihaan Misra","url":"https://www.catalyzex.com/author/Vihaan Misra"},{"@type":"Person","name":"Peter Schaldenbrand","url":"https://www.catalyzex.com/author/Peter Schaldenbrand"},{"@type":"Person","name":"Jean Oh","url":"https://www.catalyzex.com/author/Jean Oh"}]}},{"@type":"ListItem","position":8,"item":{"@type":"ScholarlyArticle","url":"https://www.catalyzex.com/paper/image-as-an-imu-estimating-camera-motion-from","name":"Image as an IMU: Estimating Camera Motion from a Single Motion-Blurred Image","datePublished":"2025-03-21","author":[{"@type":"Person","name":"Jerred Chen","url":"https://www.catalyzex.com/author/Jerred Chen"},{"@type":"Person","name":"Ronald Clark","url":"https://www.catalyzex.com/author/Ronald Clark"}]}},{"@type":"ListItem","position":9,"item":{"@type":"ScholarlyArticle","url":"https://www.catalyzex.com/paper/from-monocular-vision-to-autonomous-action","name":"From Monocular Vision to Autonomous Action: Guiding Tumor Resection via 3D Reconstruction","datePublished":"2025-03-20","author":[{"@type":"Person","name":"Ayberk Acar","url":"https://www.catalyzex.com/author/Ayberk Acar"},{"@type":"Person","name":"Mariana Smith","url":"https://www.catalyzex.com/author/Mariana Smith"},{"@type":"Person","name":"Lidia Al-Zogbi","url":"https://www.catalyzex.com/author/Lidia Al-Zogbi"},{"@type":"Person","name":"Tanner Watts","url":"https://www.catalyzex.com/author/Tanner Watts"},{"@type":"Person","name":"Fangjie Li","url":"https://www.catalyzex.com/author/Fangjie Li"},{"@type":"Person","name":"Hao Li","url":"https://www.catalyzex.com/author/Hao Li"},{"@type":"Person","name":"Nural Yilmaz","url":"https://www.catalyzex.com/author/Nural Yilmaz"},{"@type":"Person","name":"Paul Maria Scheikl","url":"https://www.catalyzex.com/author/Paul Maria Scheikl"},{"@type":"Person","name":"Jesse F. d'Almeida","url":"https://www.catalyzex.com/author/Jesse F. d'Almeida"},{"@type":"Person","name":"Susheela Sharma","url":"https://www.catalyzex.com/author/Susheela Sharma"},{"@type":"Person","name":"Lauren Branscombe","url":"https://www.catalyzex.com/author/Lauren Branscombe"},{"@type":"Person","name":"Tayfun Efe Ertop","url":"https://www.catalyzex.com/author/Tayfun Efe Ertop"},{"@type":"Person","name":"Robert J. Webster III","url":"https://www.catalyzex.com/author/Robert J. Webster III"},{"@type":"Person","name":"Ipek Oguz","url":"https://www.catalyzex.com/author/Ipek Oguz"},{"@type":"Person","name":"Alan Kuntz","url":"https://www.catalyzex.com/author/Alan Kuntz"},{"@type":"Person","name":"Axel Krieger","url":"https://www.catalyzex.com/author/Axel Krieger"},{"@type":"Person","name":"Jie Ying Wu","url":"https://www.catalyzex.com/author/Jie Ying Wu"}]}},{"@type":"ListItem","position":10,"item":{"@type":"ScholarlyArticle","url":"https://www.catalyzex.com/paper/fast-alignment-of-heterogeneous-images-in","name":"Fast alignment of heterogeneous images in sliced Wasserstein distance","datePublished":"2025-03-17","author":[{"@type":"Person","name":"Yunpeng Shi","url":"https://www.catalyzex.com/author/Yunpeng Shi"},{"@type":"Person","name":"Amit Singer","url":"https://www.catalyzex.com/author/Amit Singer"},{"@type":"Person","name":"Eric J. Verbeke","url":"https://www.catalyzex.com/author/Eric J. Verbeke"}]}}]}]}]}</script><link rel="preload" as="image" imageSrcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ffilter.cf288982.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ffilter.cf288982.png&w=1080&q=75 2x" fetchpriority="high"/><meta name="next-head-count" content="15"/><link rel="preconnect" href="https://fonts.googleapis.com"/><link rel="preconnect" href="https://fonts.gstatic.com" crossorigin=""/><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><meta name="p:domain_verify" content="7a8c54ff8920a71e909037ac35612f4e"/><meta name="author" content="CatalyzeX"/><meta property="og:type" content="website"/><meta property="og:site_name" content="CatalyzeX"/><meta property="og:url" content="https://www.catalyzex.com/"/><meta property="fb:app_id" content="658945670928778"/><meta property="fb:admins" content="515006233"/><meta name="twitter:card" content="summary_large_image"/><meta name="twitter:domain" content="www.catalyzex.com"/><meta name="twitter:site" content="@catalyzex"/><meta name="twitter:creator" content="@catalyzex"/><script data-partytown-config="true"> partytown = { lib: "/_next/static/~partytown/", forward: [ "gtag", "mixpanel.track", "mixpanel.track_pageview", "mixpanel.identify", "mixpanel.people.set", "mixpanel.reset", "mixpanel.get_distinct_id", "mixpanel.set_config", "manuallySyncMixpanelId" ] }; </script><link rel="preconnect" href="https://fonts.gstatic.com" crossorigin /><link rel="preload" href="/_next/static/media/155cae559bbd1a77-s.p.woff2" as="font" type="font/woff2" crossorigin="anonymous" data-next-font="size-adjust"/><link rel="preload" href="/_next/static/media/4de1fea1a954a5b6-s.p.woff2" as="font" type="font/woff2" crossorigin="anonymous" data-next-font="size-adjust"/><link rel="preload" href="/_next/static/media/6d664cce900333ee-s.p.woff2" as="font" type="font/woff2" crossorigin="anonymous" data-next-font="size-adjust"/><link rel="preload" href="/_next/static/css/21db9cac47ff2c1f.css" as="style"/><link rel="stylesheet" href="/_next/static/css/21db9cac47ff2c1f.css" data-n-g=""/><link rel="preload" href="/_next/static/css/b8053a51356cf568.css" as="style"/><link rel="stylesheet" href="/_next/static/css/b8053a51356cf568.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script data-partytown="">!(function(w,p,f,c){if(!window.crossOriginIsolated && !navigator.serviceWorker) return;c=w[p]=w[p]||{};c[f]=(c[f]||[])})(window,'partytown','forward');/* Partytown 0.10.2 - MIT builder.io */ const t={preserveBehavior:!1},e=e=>{if("string"==typeof e)return[e,t];const[n,r=t]=e;return[n,{...t,...r}]},n=Object.freeze((t=>{const e=new Set;let n=[];do{Object.getOwnPropertyNames(n).forEach((t=>{"function"==typeof n[t]&&e.add(t)}))}while((n=Object.getPrototypeOf(n))!==Object.prototype);return Array.from(e)})());!function(t,r,o,i,a,s,c,d,l,p,u=t,f){function h(){f||(f=1,"/"==(c=(s.lib||"/~partytown/")+(s.debug?"debug/":""))[0]&&(l=r.querySelectorAll('script[type="text/partytown"]'),i!=t?i.dispatchEvent(new CustomEvent("pt1",{detail:t})):(d=setTimeout(v,1e4),r.addEventListener("pt0",w),a?y(1):o.serviceWorker?o.serviceWorker.register(c+(s.swPath||"partytown-sw.js"),{scope:c}).then((function(t){t.active?y():t.installing&&t.installing.addEventListener("statechange",(function(t){"activated"==t.target.state&&y()}))}),console.error):v())))}function y(e){p=r.createElement(e?"script":"iframe"),t._pttab=Date.now(),e||(p.style.display="block",p.style.width="0",p.style.height="0",p.style.border="0",p.style.visibility="hidden",p.setAttribute("aria-hidden",!0)),p.src=c+"partytown-"+(e?"atomics.js?v=0.10.2":"sandbox-sw.html?"+t._pttab),r.querySelector(s.sandboxParent||"body").appendChild(p)}function v(n,o){for(w(),i==t&&(s.forward||[]).map((function(n){const[r]=e(n);delete t[r.split(".")[0]]})),n=0;n<l.length;n++)(o=r.createElement("script")).innerHTML=l[n].innerHTML,o.nonce=s.nonce,r.head.appendChild(o);p&&p.parentNode.removeChild(p)}function w(){clearTimeout(d)}s=t.partytown||{},i==t&&(s.forward||[]).map((function(r){const[o,{preserveBehavior:i}]=e(r);u=t,o.split(".").map((function(e,r,o){var a;u=u[o[r]]=r+1<o.length?u[o[r]]||(a=o[r+1],n.includes(a)?[]:{}):(()=>{let e=null;if(i){const{methodOrProperty:n,thisObject:r}=((t,e)=>{let n=t;for(let t=0;t<e.length-1;t+=1)n=n[e[t]];return{thisObject:n,methodOrProperty:e.length>0?n[e[e.length-1]]:void 0}})(t,o);"function"==typeof n&&(e=(...t)=>n.apply(r,...t))}return function(){let n;return e&&(n=e(arguments)),(t._ptf=t._ptf||[]).push(o,arguments),n}})()}))})),"complete"==r.readyState?h():(t.addEventListener("DOMContentLoaded",h),t.addEventListener("load",h))}(window,document,navigator,top,window.crossOriginIsolated);</script><script src="https://www.googletagmanager.com/gtag/js?id=G-BD14FTHPNC" type="text/partytown" data-nscript="worker"></script><script defer="" src="/_next/static/chunks/336.311897441b58c7f9.js"></script><script src="/_next/static/chunks/webpack-74a7c512fa42fc69.js" defer=""></script><script src="/_next/static/chunks/main-819661c54c38eafc.js" defer=""></script><script src="/_next/static/chunks/pages/_app-7f9dc6693ce04520.js" defer=""></script><script src="/_next/static/chunks/117-cbf0dd2a93fca997.js" defer=""></script><script src="/_next/static/chunks/602-80e933e094e77991.js" defer=""></script><script src="/_next/static/chunks/947-ca6cb45655821eab.js" defer=""></script><script src="/_next/static/chunks/403-8b84e5049c16d49f.js" defer=""></script><script src="/_next/static/chunks/460-cfc8c96502458833.js" defer=""></script><script src="/_next/static/chunks/68-8acf76971c46bf47.js" defer=""></script><script src="/_next/static/chunks/pages/search-695fe919c8d5cb9b.js" defer=""></script><script src="/_next/static/rcP1HS6ompi8ywYpLW-WW/_buildManifest.js" defer=""></script><script src="/_next/static/rcP1HS6ompi8ywYpLW-WW/_ssgManifest.js" defer=""></script><style data-href="https://fonts.googleapis.com/css2?family=Lato:wght@300;400;700&display=swap">@font-face{font-family:'Lato';font-style:normal;font-weight:300;font-display:swap;src:url(https://fonts.gstatic.com/s/lato/v24/S6u9w4BMUTPHh7USeww.woff) format('woff')}@font-face{font-family:'Lato';font-style:normal;font-weight:400;font-display:swap;src:url(https://fonts.gstatic.com/s/lato/v24/S6uyw4BMUTPHvxo.woff) format('woff')}@font-face{font-family:'Lato';font-style:normal;font-weight:700;font-display:swap;src:url(https://fonts.gstatic.com/s/lato/v24/S6u9w4BMUTPHh6UVeww.woff) format('woff')}@font-face{font-family:'Lato';font-style:normal;font-weight:300;font-display:swap;src:url(https://fonts.gstatic.com/s/lato/v24/S6u9w4BMUTPHh7USSwaPGQ3q5d0N7w.woff2) format('woff2');unicode-range:U+0100-02AF,U+0304,U+0308,U+0329,U+1E00-1E9F,U+1EF2-1EFF,U+2020,U+20A0-20AB,U+20AD-20C0,U+2113,U+2C60-2C7F,U+A720-A7FF}@font-face{font-family:'Lato';font-style:normal;font-weight:300;font-display:swap;src:url(https://fonts.gstatic.com/s/lato/v24/S6u9w4BMUTPHh7USSwiPGQ3q5d0.woff2) format('woff2');unicode-range:U+0000-00FF,U+0131,U+0152-0153,U+02BB-02BC,U+02C6,U+02DA,U+02DC,U+0304,U+0308,U+0329,U+2000-206F,U+2074,U+20AC,U+2122,U+2191,U+2193,U+2212,U+2215,U+FEFF,U+FFFD}@font-face{font-family:'Lato';font-style:normal;font-weight:400;font-display:swap;src:url(https://fonts.gstatic.com/s/lato/v24/S6uyw4BMUTPHjxAwXiWtFCfQ7A.woff2) format('woff2');unicode-range:U+0100-02AF,U+0304,U+0308,U+0329,U+1E00-1E9F,U+1EF2-1EFF,U+2020,U+20A0-20AB,U+20AD-20C0,U+2113,U+2C60-2C7F,U+A720-A7FF}@font-face{font-family:'Lato';font-style:normal;font-weight:400;font-display:swap;src:url(https://fonts.gstatic.com/s/lato/v24/S6uyw4BMUTPHjx4wXiWtFCc.woff2) format('woff2');unicode-range:U+0000-00FF,U+0131,U+0152-0153,U+02BB-02BC,U+02C6,U+02DA,U+02DC,U+0304,U+0308,U+0329,U+2000-206F,U+2074,U+20AC,U+2122,U+2191,U+2193,U+2212,U+2215,U+FEFF,U+FFFD}@font-face{font-family:'Lato';font-style:normal;font-weight:700;font-display:swap;src:url(https://fonts.gstatic.com/s/lato/v24/S6u9w4BMUTPHh6UVSwaPGQ3q5d0N7w.woff2) format('woff2');unicode-range:U+0100-02AF,U+0304,U+0308,U+0329,U+1E00-1E9F,U+1EF2-1EFF,U+2020,U+20A0-20AB,U+20AD-20C0,U+2113,U+2C60-2C7F,U+A720-A7FF}@font-face{font-family:'Lato';font-style:normal;font-weight:700;font-display:swap;src:url(https://fonts.gstatic.com/s/lato/v24/S6u9w4BMUTPHh6UVSwiPGQ3q5d0.woff2) format('woff2');unicode-range:U+0000-00FF,U+0131,U+0152-0153,U+02BB-02BC,U+02C6,U+02DA,U+02DC,U+0304,U+0308,U+0329,U+2000-206F,U+2074,U+20AC,U+2122,U+2191,U+2193,U+2212,U+2215,U+FEFF,U+FFFD}</style></head><body><div id="__next"><script id="google-analytics" type="text/partytown"> window.dataLayer = window.dataLayer || []; window.gtag = function gtag(){window.dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'G-BD14FTHPNC', { page_path: window.location.pathname, }); </script><script type="text/partytown"> const MIXPANEL_CUSTOM_LIB_URL = 'https://www.catalyzex.com/mp-cdn/libs/mixpanel-2-latest.min.js'; (function(f,b){if(!b.__SV){var e,g,i,h;window.mixpanel=b;b._i=[];b.init=function(e,f,c){function g(a,d){var b=d.split(".");2==b.length&&(a=a[b[0]],d=b[1]);a[d]=function(){a.push([d].concat(Array.prototype.slice.call(arguments,0)))}}var a=b;"undefined"!==typeof c?a=b[c]=[]:c="mixpanel";a.people=a.people||[];a.toString=function(a){var d="mixpanel";"mixpanel"!==c&&(d+="."+c);a||(d+=" (stub)");return d};a.people.toString=function(){return a.toString(1)+".people (stub)"};i="disable time_event track track_pageview track_links track_forms track_with_groups add_group set_group remove_group register register_once alias unregister identify name_tag set_config reset opt_in_tracking opt_out_tracking has_opted_in_tracking has_opted_out_tracking clear_opt_in_out_tracking start_batch_senders people.set people.set_once people.unset people.increment people.append people.union people.track_charge people.clear_charges people.delete_user people.remove".split(" "); for(h=0;h<i.length;h++)g(a,i[h]);var j="set set_once union unset remove delete".split(" ");a.get_group=function(){function b(c){d[c]=function(){call2_args=arguments;call2=[c].concat(Array.prototype.slice.call(call2_args,0));a.push([e,call2])}}for(var d={},e=["get_group"].concat(Array.prototype.slice.call(arguments,0)),c=0;c<j.length;c++)b(j[c]);return d};b._i.push([e,f,c])};b.__SV=1.2;e=f.createElement("script");e.type="text/javascript";e.async=!0;e.src="undefined"!==typeof MIXPANEL_CUSTOM_LIB_URL? MIXPANEL_CUSTOM_LIB_URL:"file:"===f.location.protocol&&"//catalyzex.com/mp-cdn/libs/mixpanel-2-latest.min.js".match(/^\/\//)?"https://www.catalyzex.com/mp-cdn/libs/mixpanel-2-latest.min.js":"//catalyzex.com/mp-cdn/libs/mixpanel-2-latest.min.js";g=f.getElementsByTagName("script")[0];g.parentNode.insertBefore(e,g)}})(document,window.mixpanel||[]); mixpanel.init("851392464b60e8cc1948a193642f793b", { api_host: "https://www.catalyzex.com/mp", }) manuallySyncMixpanelId = function(currentMixpanelId) { const inMemoryProps = mixpanel?.persistence?.props if (inMemoryProps) { inMemoryProps['distinct_id'] = currentMixpanelId inMemoryProps['$device_id'] = currentMixpanelId delete inMemoryProps['$user_id'] } } </script><div class="Layout_layout-container__GqQwY"><div><div data-testid="banner-main-container" id="Banner_banner-main-container__DgEOW" class="cx-banner"><span class="Banner_content__a4ws8 Banner_default-content___HRmT">Get our free extension to see links to code for papers anywhere online!</span><span class="Banner_content__a4ws8 Banner_small-content__iQlll">Free add-on: code for papers everywhere!</span><span class="Banner_content__a4ws8 Banner_extra-small-content__qkq9E">Free add-on: See code for papers anywhere!</span><div class="Banner_banner-button-section__kX1fj"><a class="Banner_banner-social-button__b3sZ7 Banner_browser-button__6CbLf" href="https://chrome.google.com/webstore/detail/%F0%9F%92%BB-catalyzex-link-all-aim/aikkeehnlfpamidigaffhfmgbkdeheil" rel="noreferrer" target="_blank"><p><img src="/static/images/google-chrome.svg" alt="Chrome logo"/>Add to <!-- -->Chrome</p></a><a class="Banner_firefox-button__nwnR6 Banner_banner-social-button__b3sZ7 Banner_browser-button__6CbLf" href="https://addons.mozilla.org/en-US/firefox/addon/code-finder-catalyzex" rel="noreferrer" target="_blank"><p><img src="/static/images/firefox.svg" alt="Firefox logo"/>Add to <!-- -->Firefox</p></a><a class="Banner_banner-social-button__b3sZ7 Banner_browser-button__6CbLf" href="https://microsoftedge.microsoft.com/addons/detail/get-papers-with-code-ever/mflbgfojghoglejmalekheopgadjmlkm" rel="noreferrer" target="_blank"><p><img src="/static/images/microsoft-edge.svg" alt="Edge logo"/>Add to <!-- -->Edge</p></a></div><div id="Banner_banner-close-button__68_52" class="banner-close-button" data-testid="banner-close-icon" role="button" tabindex="0" aria-label="Home"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" height="22" width="22" color="black"><path stroke-linecap="round" stroke-linejoin="round" d="m9.75 9.75 4.5 4.5m0-4.5-4.5 4.5M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z"></path></svg></div></div></div><section data-hydration-on-demand="true"></section><div data-testid="header-main-container" class="Header_navbar__bVRQt"><nav><div><a class="Header_navbar-brand__9oFe_" href="/"><svg version="1.0" xmlns="http://www.w3.org/2000/svg" width="466.000000pt" height="466.000000pt" viewBox="0 0 466.000000 466.000000" preserveAspectRatio="xMidYMid meet" data-testid="catalyzex-header-icon"><title>CatalyzeX Icon</title><g transform="translate(0.000000,466.000000) scale(0.100000,-0.100000)" fill="#000000" stroke="none"><path d="M405 3686 c-42 -18 -83 -69 -92 -114 -4 -20 -8 -482 -8 -1027 l0 -990 25 -44 c16 -28 39 -52 65 -65 38 -20 57 -21 433 -24 444 -3 487 1 538 52 18 18 37 50 43 71 7 25 11 154 11 343 l0 302 -165 0 -165 0 0 -240 0 -240 -225 0 -225 0 0 855 0 855 225 0 225 0 0 -225 0 -225 166 0 165 0 -3 308 c-3 289 -4 309 -24 342 -11 19 -38 45 -60 57 -39 23 -42 23 -469 22 -335 0 -437 -3 -460 -13z"></path><path d="M1795 3686 c-16 -7 -38 -23 -48 -34 -47 -52 -46 -27 -47 -1262 0 -808 3 -1177 11 -1205 14 -50 63 -102 109 -115 19 -5 142 -10 273 -10 l238 0 -3 148 -3 147 -125 2 c-69 0 -135 1 -147 2 l-23 1 0 1025 0 1025 150 0 150 0 0 145 0 145 -252 0 c-188 -1 -261 -4 -283 -14z"></path><path d="M3690 3555 l0 -145 155 0 155 0 0 -1025 0 -1025 -27 0 c-16 -1 -84 -2 -153 -3 l-125 -2 -3 -148 -3 -148 258 3 c296 3 309 7 351 88 l22 45 -2 1202 c-3 1196 -3 1202 -24 1229 -11 15 -33 37 -48 48 -26 20 -43 21 -292 24 l-264 3 0 -146z"></path><path d="M2520 2883 c0 -5 70 -164 156 -356 l157 -347 -177 -374 c-97 -205 -176 -376 -176 -380 0 -3 77 -5 171 -4 l172 3 90 228 c49 125 93 227 97 227 4 0 47 -103 95 -230 l87 -230 174 0 c96 0 174 2 174 3 0 2 -79 172 -175 377 -96 206 -175 378 -175 382 0 8 303 678 317 701 2 4 -70 7 -161 7 l-164 0 -83 -210 c-45 -115 -85 -210 -89 -210 -4 0 -43 95 -86 210 l-79 210 -162 0 c-90 0 -163 -3 -163 -7z"></path></g></svg></a></div></nav></div><div data-testid="search-page-main-container" class="Search_search-page-main-container__1ayN5"><div class="Searchbar_search-bar-container__xIN4L rounded-border Search_searchbar-component__QG9QV" id="searchbar-component"><form class="Searchbar_search-bar-container__xIN4L" data-testid="search-bar-form"><div><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" height="22"><title>Search Icon</title><path stroke-linecap="round" stroke-linejoin="round" d="m21 21-5.197-5.197m0 0A7.5 7.5 0 1 0 5.196 5.196a7.5 7.5 0 0 0 10.607 10.607Z"></path></svg><input class="form-control Searchbar_search-field__L9Oaa" type="text" id="search-field" name="search" required="" autoComplete="off" placeholder="What are you working on?" value="Image To Image Translation"/><button class="Searchbar_clear-form__WzDSJ" type="button"><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" height="24" width="24"><path stroke-linecap="round" stroke-linejoin="round" d="M6 18 18 6M6 6l12 12"></path></svg></button><button class="Searchbar_filter-icon-container__qAKJN" type="button" title="search by advanced filters like language/framework, computational requirement, dataset, use case, hardware, etc."><div class="Searchbar_pulse1__6sv_E"></div><img alt="Alert button" fetchpriority="high" width="512" height="512" decoding="async" data-nimg="1" class="Searchbar_filter-icon__0rBbt" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ffilter.cf288982.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ffilter.cf288982.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ffilter.cf288982.png&w=1080&q=75"/></button></div></form></div><div class="Search_search-title-container__QvnOo"><h1><span class="descriptor" style="display:none">Topic:</span><b>Image To Image Translation</b></h1><div class="wrapper Search_alert-wrapper__mJrm4"><button class="AlertButton_alert-btn__pC8cK" title="Get latest alerts for these search results"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div><br/></div><p class="Search_topic-blurb-content__b9CTE"><span class="descriptor" style="display:none">What is Image To Image Translation? </span>Image-to-image translation is the process of converting an image from one domain to another using deep learning techniques.</p><h3 class="descriptor" style="display:none">Papers and Code</h3><div data-testid="toggle-search-bar" id="Search_toggle-search-bar__PbOLK"><span></span><div style="position:relative;display:inline-block;text-align:left;opacity:1;direction:ltr;border-radius:11px;-webkit-transition:opacity 0.25s;-moz-transition:opacity 0.25s;transition:opacity 0.25s;touch-action:none;-webkit-tap-highlight-color:rgba(0, 0, 0, 0);-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none"><div class="react-switch-bg" style="height:22px;width:45px;margin:0;position:relative;background:#888888;border-radius:11px;cursor:pointer;-webkit-transition:background 0.25s;-moz-transition:background 0.25s;transition:background 0.25s"></div><div class="react-switch-handle" style="height:15px;width:15px;background:#ffffff;display:inline-block;cursor:pointer;border-radius:50%;position:absolute;transform:translateX(3.5px);top:3.5px;outline:0;border:0;-webkit-transition:background-color 0.25s, transform 0.25s, box-shadow 0.15s;-moz-transition:background-color 0.25s, transform 0.25s, box-shadow 0.15s;transition:background-color 0.25s, transform 0.25s, box-shadow 0.15s"></div><input type="checkbox" role="switch" aria-checked="false" style="border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px" aria-label="Search with code"/></div></div><div><section data-testid="paper-details-container" class="Search_paper-details-container__Dou2Q"><h2 class="Search_paper-heading__bq58c"><a data-testid="paper-result-title" href="/paper/lapig-cross-modal-generation-of-paired"><strong>LaPIG: Cross-Modal Generation of Paired Thermal and Visible Facial Images</strong></a></h2><div class="Search_buttons-container__WWw_l"><a href="#" target="_blank" id="request-code-2503.16376" data-testid="view-code-button" class="Search_view-code-link__xOgGF"><button type="button" class="btn Search_view-button__D5D2K Search_buttons-spacing__iB2NS Search_black-button__O7oac Search_view-code-button__8Dk6Z"><svg role="img" height="14" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="#fff"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg>Request Code</button></a><button type="button" class="Search_buttons-spacing__iB2NS Search_related-code-btn__F5B3X" data-testid="related-code-button"><span class="descriptor" style="display:none">Code for Similar Papers:</span><img alt="Code for Similar Papers" title="View code for similar papers" loading="lazy" width="37" height="35" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75"/></button><a class="Search_buttons-spacing__iB2NS Search_add-code-button__GKwQr" target="_blank" href="/add_code?title=LaPIG: Cross-Modal Generation of Paired Thermal and Visible Facial Images&paper_url=http://arxiv.org/abs/2503.16376" rel="nofollow"><img alt="Add code" title="Contribute your code for this paper to the community" loading="lazy" width="36" height="36" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75"/></a><div class="wrapper Search_buttons-spacing__iB2NS BookmarkButton_bookmark-wrapper__xJaOg"><button title="Bookmark this paper"><img alt="Bookmark button" id="bookmark-btn" loading="lazy" width="388" height="512" decoding="async" data-nimg="1" class="BookmarkButton_bookmark-btn-image__gkInJ" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75"/></button></div><div class="wrapper Search_buttons-spacing__iB2NS"><button class="AlertButton_alert-btn__pC8cK" title="Get alerts when new code is available for this paper"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div></div><span class="Search_publication-date__mLvO2">Mar 20, 2025<br/></span><div class="AuthorLinks_authors-container__fAwXT"><span class="descriptor" style="display:none">Authors:</span><span><a data-testid="paper-result-author" href="/author/Leyang%20Wang">Leyang Wang</a>, </span><span><a data-testid="paper-result-author" href="/author/Joice%20Lin">Joice Lin</a></span></div><div class="Search_paper-detail-page-images-container__FPeuN"></div><p class="Search_paper-content__1CSu5 text-with-links"><span class="descriptor" style="display:none">Abstract:</span>The success of modern machine learning, particularly in facial translation networks, is highly dependent on the availability of high-quality, paired, large-scale datasets. However, acquiring sufficient data is often challenging and costly. Inspired by the recent success of diffusion models in high-quality image synthesis and advancements in Large Language Models (LLMs), we propose a novel framework called LLM-assisted Paired Image Generation (LaPIG). This framework enables the construction of comprehensive, high-quality paired visible and thermal images using captions generated by LLMs. Our method encompasses three parts: visible image synthesis with ArcFace embedding, thermal image translation using Latent Diffusion Models (LDMs), and caption generation with LLMs. Our approach not only generates multi-view paired visible and thermal images to increase data diversity but also produces high-quality paired data while maintaining their identity information. We evaluate our method on public datasets by comparing it with existing methods, demonstrating the superiority of LaPIG.<br/></p><div class="text-with-links"><span></span><span></span></div><div class="Search_search-result-provider__uWcak">Via<img alt="arxiv icon" loading="lazy" width="56" height="25" decoding="async" data-nimg="1" class="Search_arxiv-icon__SXHe4" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=64&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75"/></div><div class="Search_paper-link__nVhf_"><svg role="img" height="20" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" style="margin-right:5px"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="22" style="margin-right:10px;margin-top:2px"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"></path></svg><a data-testid="paper-result-access-link" href="/paper/lapig-cross-modal-generation-of-paired">Access Paper or Ask Questions</a></div></section><div class="Search_seperator-line__4FidS"></div></div><div><section data-testid="paper-details-container" class="Search_paper-details-container__Dou2Q"><h2 class="Search_paper-heading__bq58c"><a data-testid="paper-result-title" href="/paper/single-step-bidirectional-unpaired-image"><strong>Single-Step Bidirectional Unpaired Image Translation Using Implicit Bridge Consistency Distillation</strong></a></h2><div class="Search_buttons-container__WWw_l"><a href="#" target="_blank" id="request-code-2503.15056" data-testid="view-code-button" class="Search_view-code-link__xOgGF"><button type="button" class="btn Search_view-button__D5D2K Search_buttons-spacing__iB2NS Search_black-button__O7oac Search_view-code-button__8Dk6Z"><svg role="img" height="14" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="#fff"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg>View Code</button></a><button type="button" class="btn Search_view-button__D5D2K Search_black-button__O7oac Search_buttons-spacing__iB2NS"><svg fill="#fff" height="20" viewBox="0 0 48 48" width="20" xmlns="http://www.w3.org/2000/svg"><title>Play Icon</title><path d="M0 0h48v48H0z" fill="none"></path><path d="M24 4C12.95 4 4 12.95 4 24s8.95 20 20 20 20-8.95 20-20S35.05 4 24 4zm-4 29V15l12 9-12 9z"></path></svg>Notebook</button><button type="button" class="Search_buttons-spacing__iB2NS Search_related-code-btn__F5B3X" data-testid="related-code-button"><span class="descriptor" style="display:none">Code for Similar Papers:</span><img alt="Code for Similar Papers" title="View code for similar papers" loading="lazy" width="37" height="35" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75"/></button><a class="Search_buttons-spacing__iB2NS Search_add-code-button__GKwQr" target="_blank" href="/add_code?title=Single-Step Bidirectional Unpaired Image Translation Using Implicit Bridge Consistency Distillation&paper_url=http://arxiv.org/abs/2503.15056" rel="nofollow"><img alt="Add code" title="Contribute your code for this paper to the community" loading="lazy" width="36" height="36" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75"/></a><div class="wrapper Search_buttons-spacing__iB2NS BookmarkButton_bookmark-wrapper__xJaOg"><button title="Bookmark this paper"><img alt="Bookmark button" id="bookmark-btn" loading="lazy" width="388" height="512" decoding="async" data-nimg="1" class="BookmarkButton_bookmark-btn-image__gkInJ" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75"/></button></div><div class="wrapper Search_buttons-spacing__iB2NS"><button class="AlertButton_alert-btn__pC8cK" title="Get alerts when new code is available for this paper"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div></div><span class="Search_publication-date__mLvO2">Mar 19, 2025<br/></span><div class="AuthorLinks_authors-container__fAwXT"><span class="descriptor" style="display:none">Authors:</span><span><a data-testid="paper-result-author" href="/author/Suhyeon%20Lee">Suhyeon Lee</a>, </span><span><a data-testid="paper-result-author" href="/author/Kwanyoung%20Kim">Kwanyoung Kim</a>, </span><span><a data-testid="paper-result-author" href="/author/Jong%20Chul%20Ye">Jong Chul Ye</a></span></div><div class="Search_paper-detail-page-images-container__FPeuN"></div><p class="Search_paper-content__1CSu5 text-with-links"><span class="descriptor" style="display:none">Abstract:</span>Unpaired image-to-image translation has seen significant progress since the introduction of CycleGAN. However, methods based on diffusion models or Schr\"odinger bridges have yet to be widely adopted in real-world applications due to their iterative sampling nature. To address this challenge, we propose a novel framework, Implicit Bridge Consistency Distillation (IBCD), which enables single-step bidirectional unpaired translation without using adversarial loss. IBCD extends consistency distillation by using a diffusion implicit bridge model that connects PF-ODE trajectories between distributions. Additionally, we introduce two key improvements: 1) distribution matching for consistency distillation and 2) adaptive weighting method based on distillation difficulty. Experimental results demonstrate that IBCD achieves state-of-the-art performance on benchmark datasets in a single generation step. Project page available at <a href="https://hyn2028.github.io/project_page/IBCD/index.html">https://hyn2028.github.io/project_page/IBCD/index.html</a><br/></p><div class="text-with-links"><span></span><span><em>* <!-- -->25 pages, 16 figures<!-- -->聽</em><br/></span></div><div class="Search_search-result-provider__uWcak">Via<img alt="arxiv icon" loading="lazy" width="56" height="25" decoding="async" data-nimg="1" class="Search_arxiv-icon__SXHe4" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=64&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75"/></div><div class="Search_paper-link__nVhf_"><svg role="img" height="20" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" style="margin-right:5px"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="22" style="margin-right:10px;margin-top:2px"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"></path></svg><a data-testid="paper-result-access-link" href="/paper/single-step-bidirectional-unpaired-image">Access Paper or Ask Questions</a></div></section><div class="Search_seperator-line__4FidS"></div></div><section data-hydration-on-demand="true"><div><section data-testid="paper-details-container" class="Search_paper-details-container__Dou2Q"><h2 class="Search_paper-heading__bq58c"><a data-testid="paper-result-title" href="/paper/openmibood-open-medical-imaging-benchmarks"><strong>OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection</strong></a></h2><div class="Search_buttons-container__WWw_l"><a href="#" target="_blank" id="request-code-2503.16247" data-testid="view-code-button" class="Search_view-code-link__xOgGF"><button type="button" class="btn Search_view-button__D5D2K Search_buttons-spacing__iB2NS Search_black-button__O7oac Search_view-code-button__8Dk6Z"><svg role="img" height="14" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="#fff"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg>View Code</button></a><button type="button" class="btn Search_view-button__D5D2K Search_black-button__O7oac Search_buttons-spacing__iB2NS"><svg fill="#fff" height="20" viewBox="0 0 48 48" width="20" xmlns="http://www.w3.org/2000/svg"><title>Play Icon</title><path d="M0 0h48v48H0z" fill="none"></path><path d="M24 4C12.95 4 4 12.95 4 24s8.95 20 20 20 20-8.95 20-20S35.05 4 24 4zm-4 29V15l12 9-12 9z"></path></svg>Notebook</button><button type="button" class="Search_buttons-spacing__iB2NS Search_related-code-btn__F5B3X" data-testid="related-code-button"><span class="descriptor" style="display:none">Code for Similar Papers:</span><img alt="Code for Similar Papers" title="View code for similar papers" loading="lazy" width="37" height="35" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75"/></button><a class="Search_buttons-spacing__iB2NS Search_add-code-button__GKwQr" target="_blank" href="/add_code?title=OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection&paper_url=http://arxiv.org/abs/2503.16247" rel="nofollow"><img alt="Add code" title="Contribute your code for this paper to the community" loading="lazy" width="36" height="36" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75"/></a><div class="wrapper Search_buttons-spacing__iB2NS BookmarkButton_bookmark-wrapper__xJaOg"><button title="Bookmark this paper"><img alt="Bookmark button" id="bookmark-btn" loading="lazy" width="388" height="512" decoding="async" data-nimg="1" class="BookmarkButton_bookmark-btn-image__gkInJ" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75"/></button></div><div class="wrapper Search_buttons-spacing__iB2NS"><button class="AlertButton_alert-btn__pC8cK" title="Get alerts when new code is available for this paper"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div></div><span class="Search_publication-date__mLvO2">Mar 20, 2025<br/></span><div class="AuthorLinks_authors-container__fAwXT"><span class="descriptor" style="display:none">Authors:</span><span><a data-testid="paper-result-author" href="/author/Max%20Gutbrod">Max Gutbrod</a>, </span><span><a data-testid="paper-result-author" href="/author/David%20Rauber">David Rauber</a>, </span><span><a data-testid="paper-result-author" href="/author/Danilo%20Weber%20Nunes">Danilo Weber Nunes</a>, </span><span><a data-testid="paper-result-author" href="/author/Christoph%20Palm">Christoph Palm</a></span></div><div class="Search_paper-detail-page-images-container__FPeuN"></div><p class="Search_paper-content__1CSu5 text-with-links"><span class="descriptor" style="display:none">Abstract:</span>The growing reliance on Artificial Intelligence (AI) in critical domains such as healthcare demands robust mechanisms to ensure the trustworthiness of these systems, especially when faced with unexpected or anomalous inputs. This paper introduces the Open Medical Imaging Benchmarks for Out-Of-Distribution Detection (OpenMIBOOD), a comprehensive framework for evaluating out-of-distribution (OOD) detection methods specifically in medical imaging contexts. OpenMIBOOD includes three benchmarks from diverse medical domains, encompassing 14 datasets divided into covariate-shifted in-distribution, near-OOD, and far-OOD categories. We evaluate 24 post-hoc methods across these benchmarks, providing a standardized reference to advance the development and fair comparison of OOD detection methods. Results reveal that findings from broad-scale OOD benchmarks in natural image domains do not translate to medical applications, underscoring the critical need for such benchmarks in the medical field. By mitigating the risk of exposing AI models to inputs outside their training distribution, OpenMIBOOD aims to support the advancement of reliable and trustworthy AI systems in healthcare. The repository is available at <a href="https://github.com/remic-othr/OpenMIBOOD">https://github.com/remic-othr/OpenMIBOOD</a>.<br/></p><div class="text-with-links"><span></span><span></span></div><div class="Search_search-result-provider__uWcak">Via<img alt="arxiv icon" loading="lazy" width="56" height="25" decoding="async" data-nimg="1" class="Search_arxiv-icon__SXHe4" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=64&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75"/></div><div class="Search_paper-link__nVhf_"><svg role="img" height="20" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" style="margin-right:5px"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="22" style="margin-right:10px;margin-top:2px"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"></path></svg><a data-testid="paper-result-access-link" href="/paper/openmibood-open-medical-imaging-benchmarks">Access Paper or Ask Questions</a></div></section><div class="Search_seperator-line__4FidS"></div></div><div><section data-testid="paper-details-container" class="Search_paper-details-container__Dou2Q"><h2 class="Search_paper-heading__bq58c"><a data-testid="paper-result-title" href="/paper/whole-body-image-to-image-translation-for-a"><strong>Whole-Body Image-to-Image Translation for a Virtual Scanner in a Healthcare Digital Twin</strong></a></h2><div class="Search_buttons-container__WWw_l"><a href="#" target="_blank" id="request-code-2503.15555" data-testid="view-code-button" class="Search_view-code-link__xOgGF"><button type="button" class="btn Search_view-button__D5D2K Search_buttons-spacing__iB2NS Search_black-button__O7oac Search_view-code-button__8Dk6Z"><svg role="img" height="14" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="#fff"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg>Request Code</button></a><button type="button" class="Search_buttons-spacing__iB2NS Search_related-code-btn__F5B3X" data-testid="related-code-button"><span class="descriptor" style="display:none">Code for Similar Papers:</span><img alt="Code for Similar Papers" title="View code for similar papers" loading="lazy" width="37" height="35" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75"/></button><a class="Search_buttons-spacing__iB2NS Search_add-code-button__GKwQr" target="_blank" href="/add_code?title=Whole-Body Image-to-Image Translation for a Virtual Scanner in a Healthcare Digital Twin&paper_url=http://arxiv.org/abs/2503.15555" rel="nofollow"><img alt="Add code" title="Contribute your code for this paper to the community" loading="lazy" width="36" height="36" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75"/></a><div class="wrapper Search_buttons-spacing__iB2NS BookmarkButton_bookmark-wrapper__xJaOg"><button title="Bookmark this paper"><img alt="Bookmark button" id="bookmark-btn" loading="lazy" width="388" height="512" decoding="async" data-nimg="1" class="BookmarkButton_bookmark-btn-image__gkInJ" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75"/></button></div><div class="wrapper Search_buttons-spacing__iB2NS"><button class="AlertButton_alert-btn__pC8cK" title="Get alerts when new code is available for this paper"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div></div><span class="Search_publication-date__mLvO2">Mar 18, 2025<br/></span><div class="AuthorLinks_authors-container__fAwXT"><span class="descriptor" style="display:none">Authors:</span><span><a data-testid="paper-result-author" href="/author/Valerio%20Guarrasi">Valerio Guarrasi</a>, </span><span><a data-testid="paper-result-author" href="/author/Francesco%20Di%20Feola">Francesco Di Feola</a>, </span><span><a data-testid="paper-result-author" href="/author/Rebecca%20Restivo">Rebecca Restivo</a>, </span><span><a data-testid="paper-result-author" href="/author/Lorenzo%20Tronchin">Lorenzo Tronchin</a>, </span><span><a data-testid="paper-result-author" href="/author/Paolo%20Soda">Paolo Soda</a></span></div><div class="Search_paper-detail-page-images-container__FPeuN"></div><p class="Search_paper-content__1CSu5 text-with-links"><span class="descriptor" style="display:none">Abstract:</span>Generating positron emission tomography (PET) images from computed tomography (CT) scans via deep learning offers a promising pathway to reduce radiation exposure and costs associated with PET imaging, improving patient care and accessibility to functional imaging. Whole-body image translation presents challenges due to anatomical heterogeneity, often limiting generalized models. We propose a framework that segments whole-body CT images into four regions-head, trunk, arms, and legs-and uses district-specific Generative Adversarial Networks (GANs) for tailored CT-to-PET translation. Synthetic PET images from each region are stitched together to reconstruct the whole-body scan. Comparisons with a baseline non-segmented GAN and experiments with Pix2Pix and CycleGAN architectures tested paired and unpaired scenarios. Quantitative evaluations at district, whole-body, and lesion levels demonstrated significant improvements with our district-specific GANs. Pix2Pix yielded superior metrics, ensuring precise, high-quality image synthesis. By addressing anatomical heterogeneity, this approach achieves state-of-the-art results in whole-body CT-to-PET translation. This methodology supports healthcare Digital Twins by enabling accurate virtual PET scans from CT data, creating virtual imaging representations to monitor, predict, and optimize health outcomes.<br/></p><div class="text-with-links"><span></span><span></span></div><div class="Search_search-result-provider__uWcak">Via<img alt="arxiv icon" loading="lazy" width="56" height="25" decoding="async" data-nimg="1" class="Search_arxiv-icon__SXHe4" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=64&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75"/></div><div class="Search_paper-link__nVhf_"><svg role="img" height="20" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" style="margin-right:5px"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="22" style="margin-right:10px;margin-top:2px"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"></path></svg><a data-testid="paper-result-access-link" href="/paper/whole-body-image-to-image-translation-for-a">Access Paper or Ask Questions</a></div></section><div class="Search_seperator-line__4FidS"></div></div><div><section data-testid="paper-details-container" class="Search_paper-details-container__Dou2Q"><h2 class="Search_paper-heading__bq58c"><a data-testid="paper-result-title" href="/paper/ultrasound-image-to-video-synthesis-via"><strong>Ultrasound Image-to-Video Synthesis via Latent Dynamic Diffusion Models</strong></a></h2><div class="Search_buttons-container__WWw_l"><a href="#" target="_blank" id="request-code-2503.14966" data-testid="view-code-button" class="Search_view-code-link__xOgGF"><button type="button" class="btn Search_view-button__D5D2K Search_buttons-spacing__iB2NS Search_black-button__O7oac Search_view-code-button__8Dk6Z"><svg role="img" height="14" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="#fff"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg>View Code</button></a><button type="button" class="btn Search_view-button__D5D2K Search_black-button__O7oac Search_buttons-spacing__iB2NS"><svg fill="#fff" height="20" viewBox="0 0 48 48" width="20" xmlns="http://www.w3.org/2000/svg"><title>Play Icon</title><path d="M0 0h48v48H0z" fill="none"></path><path d="M24 4C12.95 4 4 12.95 4 24s8.95 20 20 20 20-8.95 20-20S35.05 4 24 4zm-4 29V15l12 9-12 9z"></path></svg>Notebook</button><button type="button" class="Search_buttons-spacing__iB2NS Search_related-code-btn__F5B3X" data-testid="related-code-button"><span class="descriptor" style="display:none">Code for Similar Papers:</span><img alt="Code for Similar Papers" title="View code for similar papers" loading="lazy" width="37" height="35" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75"/></button><a class="Search_buttons-spacing__iB2NS Search_add-code-button__GKwQr" target="_blank" href="/add_code?title=Ultrasound Image-to-Video Synthesis via Latent Dynamic Diffusion Models&paper_url=http://arxiv.org/abs/2503.14966" rel="nofollow"><img alt="Add code" title="Contribute your code for this paper to the community" loading="lazy" width="36" height="36" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75"/></a><div class="wrapper Search_buttons-spacing__iB2NS BookmarkButton_bookmark-wrapper__xJaOg"><button title="Bookmark this paper"><img alt="Bookmark button" id="bookmark-btn" loading="lazy" width="388" height="512" decoding="async" data-nimg="1" class="BookmarkButton_bookmark-btn-image__gkInJ" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75"/></button></div><div class="wrapper Search_buttons-spacing__iB2NS"><button class="AlertButton_alert-btn__pC8cK" title="Get alerts when new code is available for this paper"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div></div><span class="Search_publication-date__mLvO2">Mar 19, 2025<br/></span><div class="AuthorLinks_authors-container__fAwXT"><span class="descriptor" style="display:none">Authors:</span><span><a data-testid="paper-result-author" href="/author/Tingxiu%20Chen">Tingxiu Chen</a>, </span><span><a data-testid="paper-result-author" href="/author/Yilei%20Shi">Yilei Shi</a>, </span><span><a data-testid="paper-result-author" href="/author/Zixuan%20Zheng">Zixuan Zheng</a>, </span><span><a data-testid="paper-result-author" href="/author/Bingcong%20Yan">Bingcong Yan</a>, </span><span><a data-testid="paper-result-author" href="/author/Jingliang%20Hu">Jingliang Hu</a>, </span><span><a data-testid="paper-result-author" href="/author/Xiao%20Xiang%20Zhu">Xiao Xiang Zhu</a>, </span><span><a data-testid="paper-result-author" href="/author/Lichao%20Mou">Lichao Mou</a></span></div><div class="Search_paper-detail-page-images-container__FPeuN"></div><p class="Search_paper-content__1CSu5 text-with-links"><span class="descriptor" style="display:none">Abstract:</span>Ultrasound video classification enables automated diagnosis and has emerged as an important research area. However, publicly available ultrasound video datasets remain scarce, hindering progress in developing effective video classification models. We propose addressing this shortage by synthesizing plausible ultrasound videos from readily available, abundant ultrasound images. To this end, we introduce a latent dynamic diffusion model (LDDM) to efficiently translate static images to dynamic sequences with realistic video characteristics. We demonstrate strong quantitative results and visually appealing synthesized videos on the BUSV benchmark. Notably, training video classification models on combinations of real and LDDM-synthesized videos substantially improves performance over using real data alone, indicating our method successfully emulates dynamics critical for discrimination. Our image-to-video approach provides an effective data augmentation solution to advance ultrasound video analysis. Code is available at <a href="https://github.com/MedAITech/U_I2V">https://github.com/MedAITech/U_I2V</a>.<br/></p><div class="text-with-links"><span></span><span><em>* <!-- -->MICCAI 2024<!-- -->聽</em><br/></span></div><div class="Search_search-result-provider__uWcak">Via<img alt="arxiv icon" loading="lazy" width="56" height="25" decoding="async" data-nimg="1" class="Search_arxiv-icon__SXHe4" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=64&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75"/></div><div class="Search_paper-link__nVhf_"><svg role="img" height="20" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" style="margin-right:5px"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="22" style="margin-right:10px;margin-top:2px"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"></path></svg><a data-testid="paper-result-access-link" href="/paper/ultrasound-image-to-video-synthesis-via">Access Paper or Ask Questions</a></div></section><div class="Search_seperator-line__4FidS"></div></div><div><section data-testid="paper-details-container" class="Search_paper-details-container__Dou2Q"><h2 class="Search_paper-heading__bq58c"><a data-testid="paper-result-title" href="/paper/arc-anchored-representation-clouds-for-high"><strong>ARC: Anchored Representation Clouds for High-Resolution INR Classification</strong></a></h2><div class="Search_buttons-container__WWw_l"><a href="#" target="_blank" id="request-code-2503.15156" data-testid="view-code-button" class="Search_view-code-link__xOgGF"><button type="button" class="btn Search_view-button__D5D2K Search_buttons-spacing__iB2NS Search_black-button__O7oac Search_view-code-button__8Dk6Z"><svg role="img" height="14" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="#fff"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg>View Code</button></a><button type="button" class="btn Search_view-button__D5D2K Search_black-button__O7oac Search_buttons-spacing__iB2NS"><svg fill="#fff" height="20" viewBox="0 0 48 48" width="20" xmlns="http://www.w3.org/2000/svg"><title>Play Icon</title><path d="M0 0h48v48H0z" fill="none"></path><path d="M24 4C12.95 4 4 12.95 4 24s8.95 20 20 20 20-8.95 20-20S35.05 4 24 4zm-4 29V15l12 9-12 9z"></path></svg>Notebook</button><button type="button" class="Search_buttons-spacing__iB2NS Search_related-code-btn__F5B3X" data-testid="related-code-button"><span class="descriptor" style="display:none">Code for Similar Papers:</span><img alt="Code for Similar Papers" title="View code for similar papers" loading="lazy" width="37" height="35" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75"/></button><a class="Search_buttons-spacing__iB2NS Search_add-code-button__GKwQr" target="_blank" href="/add_code?title=ARC: Anchored Representation Clouds for High-Resolution INR Classification&paper_url=http://arxiv.org/abs/2503.15156" rel="nofollow"><img alt="Add code" title="Contribute your code for this paper to the community" loading="lazy" width="36" height="36" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75"/></a><div class="wrapper Search_buttons-spacing__iB2NS BookmarkButton_bookmark-wrapper__xJaOg"><button title="Bookmark this paper"><img alt="Bookmark button" id="bookmark-btn" loading="lazy" width="388" height="512" decoding="async" data-nimg="1" class="BookmarkButton_bookmark-btn-image__gkInJ" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75"/></button></div><div class="wrapper Search_buttons-spacing__iB2NS"><button class="AlertButton_alert-btn__pC8cK" title="Get alerts when new code is available for this paper"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div></div><span class="Search_publication-date__mLvO2">Mar 19, 2025<br/></span><div class="AuthorLinks_authors-container__fAwXT"><span class="descriptor" style="display:none">Authors:</span><span><a data-testid="paper-result-author" href="/author/Joost%20Luijmes">Joost Luijmes</a>, </span><span><a data-testid="paper-result-author" href="/author/Alexander%20Gielisse">Alexander Gielisse</a>, </span><span><a data-testid="paper-result-author" href="/author/Roman%20Knyazhitskiy">Roman Knyazhitskiy</a>, </span><span><a data-testid="paper-result-author" href="/author/Jan%20van%20Gemert">Jan van Gemert</a></span></div><div class="Search_paper-detail-page-images-container__FPeuN"></div><p class="Search_paper-content__1CSu5 text-with-links"><span class="descriptor" style="display:none">Abstract:</span>Implicit neural representations (INRs) encode signals in neural network weights as a memory-efficient representation, decoupling sampling resolution from the associated resource costs. Current INR image classification methods are demonstrated on low-resolution data and are sensitive to image-space transformations. We attribute these issues to the global, fully-connected MLP neural network architecture encoding of current INRs, which lack mechanisms for local representation: MLPs are sensitive to absolute image location and struggle with high-frequency details. We propose ARC: Anchored Representation Clouds, a novel INR architecture that explicitly anchors latent vectors locally in image-space. By introducing spatial structure to the latent vectors, ARC captures local image data which in our testing leads to state-of-the-art implicit image classification of both low- and high-resolution images and increased robustness against image-space translation. Code can be found at <a href="https://github.com/JLuij/anchored_representation_clouds">https://github.com/JLuij/anchored_representation_clouds</a>.<br/></p><div class="text-with-links"><span></span><span><em>* <!-- -->Accepted at the ICLR 2025 Workshop on Neural Network Weights as a New<!-- --> <!-- --> Data Modality<!-- -->聽</em><br/></span></div><div class="Search_search-result-provider__uWcak">Via<img alt="arxiv icon" loading="lazy" width="56" height="25" decoding="async" data-nimg="1" class="Search_arxiv-icon__SXHe4" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=64&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75"/></div><div class="Search_paper-link__nVhf_"><svg role="img" height="20" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" style="margin-right:5px"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="22" style="margin-right:10px;margin-top:2px"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"></path></svg><a data-testid="paper-result-access-link" href="/paper/arc-anchored-representation-clouds-for-high">Access Paper or Ask Questions</a></div></section><div class="Search_seperator-line__4FidS"></div></div><div><section data-testid="paper-details-container" class="Search_paper-details-container__Dou2Q"><h2 class="Search_paper-heading__bq58c"><a data-testid="paper-result-title" href="/paper/shapeshift-towards-text-to-shape-arrangement"><strong>ShapeShift: Towards Text-to-Shape Arrangement Synthesis with Content-Aware Geometric Constraints</strong></a></h2><div class="Search_buttons-container__WWw_l"><a href="#" target="_blank" id="request-code-2503.14720" data-testid="view-code-button" class="Search_view-code-link__xOgGF"><button type="button" class="btn Search_view-button__D5D2K Search_buttons-spacing__iB2NS Search_black-button__O7oac Search_view-code-button__8Dk6Z"><svg role="img" height="14" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="#fff"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg>Request Code</button></a><button type="button" class="Search_buttons-spacing__iB2NS Search_related-code-btn__F5B3X" data-testid="related-code-button"><span class="descriptor" style="display:none">Code for Similar Papers:</span><img alt="Code for Similar Papers" title="View code for similar papers" loading="lazy" width="37" height="35" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75"/></button><a class="Search_buttons-spacing__iB2NS Search_add-code-button__GKwQr" target="_blank" href="/add_code?title=ShapeShift: Towards Text-to-Shape Arrangement Synthesis with Content-Aware Geometric Constraints&paper_url=http://arxiv.org/abs/2503.14720" rel="nofollow"><img alt="Add code" title="Contribute your code for this paper to the community" loading="lazy" width="36" height="36" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75"/></a><div class="wrapper Search_buttons-spacing__iB2NS BookmarkButton_bookmark-wrapper__xJaOg"><button title="Bookmark this paper"><img alt="Bookmark button" id="bookmark-btn" loading="lazy" width="388" height="512" decoding="async" data-nimg="1" class="BookmarkButton_bookmark-btn-image__gkInJ" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75"/></button></div><div class="wrapper Search_buttons-spacing__iB2NS"><button class="AlertButton_alert-btn__pC8cK" title="Get alerts when new code is available for this paper"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div></div><span class="Search_publication-date__mLvO2">Mar 18, 2025<br/></span><div class="AuthorLinks_authors-container__fAwXT"><span class="descriptor" style="display:none">Authors:</span><span><a data-testid="paper-result-author" href="/author/Vihaan%20Misra">Vihaan Misra</a>, </span><span><a data-testid="paper-result-author" href="/author/Peter%20Schaldenbrand">Peter Schaldenbrand</a>, </span><span><a data-testid="paper-result-author" href="/author/Jean%20Oh">Jean Oh</a></span></div><div class="Search_paper-detail-page-images-container__FPeuN"></div><p class="Search_paper-content__1CSu5 text-with-links"><span class="descriptor" style="display:none">Abstract:</span>While diffusion-based models excel at generating photorealistic images from text, a more nuanced challenge emerges when constrained to using only a fixed set of rigid shapes, akin to solving tangram puzzles or arranging real-world objects to match semantic descriptions. We formalize this problem as shape-based image generation, a new text-guided image-to-image translation task that requires rearranging the input set of rigid shapes into non-overlapping configurations and visually communicating the target concept. Unlike pixel-manipulation approaches, our method, ShapeShift, explicitly parameterizes each shape within a differentiable vector graphics pipeline, iteratively optimizing placement and orientation through score distillation sampling from pretrained diffusion models. To preserve arrangement clarity, we introduce a content-aware collision resolution mechanism that applies minimal semantically coherent adjustments when overlaps occur, ensuring smooth convergence toward physically valid configurations. By bridging diffusion-based semantic guidance with explicit geometric constraints, our approach yields interpretable compositions where spatial relationships clearly embody the textual prompt. Extensive experiments demonstrate compelling results across diverse scenarios, with quantitative and qualitative advantages over alternative techniques.<br/></p><div class="text-with-links"><span></span><span></span></div><div class="Search_search-result-provider__uWcak">Via<img alt="arxiv icon" loading="lazy" width="56" height="25" decoding="async" data-nimg="1" class="Search_arxiv-icon__SXHe4" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=64&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75"/></div><div class="Search_paper-link__nVhf_"><svg role="img" height="20" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" style="margin-right:5px"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="22" style="margin-right:10px;margin-top:2px"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"></path></svg><a data-testid="paper-result-access-link" href="/paper/shapeshift-towards-text-to-shape-arrangement">Access Paper or Ask Questions</a></div></section><div class="Search_seperator-line__4FidS"></div></div><div><section data-testid="paper-details-container" class="Search_paper-details-container__Dou2Q"><h2 class="Search_paper-heading__bq58c"><a data-testid="paper-result-title" href="/paper/image-as-an-imu-estimating-camera-motion-from"><strong>Image as an IMU: Estimating Camera Motion from a Single Motion-Blurred Image</strong></a></h2><div class="Search_buttons-container__WWw_l"><a href="#" target="_blank" id="request-code-2503.17358" data-testid="view-code-button" class="Search_view-code-link__xOgGF"><button type="button" class="btn Search_view-button__D5D2K Search_buttons-spacing__iB2NS Search_black-button__O7oac Search_view-code-button__8Dk6Z"><svg role="img" height="14" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="#fff"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg>Request Code</button></a><button type="button" class="Search_buttons-spacing__iB2NS Search_related-code-btn__F5B3X" data-testid="related-code-button"><span class="descriptor" style="display:none">Code for Similar Papers:</span><img alt="Code for Similar Papers" title="View code for similar papers" loading="lazy" width="37" height="35" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75"/></button><a class="Search_buttons-spacing__iB2NS Search_add-code-button__GKwQr" target="_blank" href="/add_code?title=Image as an IMU: Estimating Camera Motion from a Single Motion-Blurred Image&paper_url=http://arxiv.org/abs/2503.17358" rel="nofollow"><img alt="Add code" title="Contribute your code for this paper to the community" loading="lazy" width="36" height="36" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75"/></a><div class="wrapper Search_buttons-spacing__iB2NS BookmarkButton_bookmark-wrapper__xJaOg"><button title="Bookmark this paper"><img alt="Bookmark button" id="bookmark-btn" loading="lazy" width="388" height="512" decoding="async" data-nimg="1" class="BookmarkButton_bookmark-btn-image__gkInJ" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75"/></button></div><div class="wrapper Search_buttons-spacing__iB2NS"><button class="AlertButton_alert-btn__pC8cK" title="Get alerts when new code is available for this paper"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div></div><span class="Search_publication-date__mLvO2">Mar 21, 2025<br/></span><div class="AuthorLinks_authors-container__fAwXT"><span class="descriptor" style="display:none">Authors:</span><span><a data-testid="paper-result-author" href="/author/Jerred%20Chen">Jerred Chen</a>, </span><span><a data-testid="paper-result-author" href="/author/Ronald%20Clark">Ronald Clark</a></span></div><div class="Search_paper-detail-page-images-container__FPeuN"></div><p class="Search_paper-content__1CSu5 text-with-links"><span class="descriptor" style="display:none">Abstract:</span>In many robotics and VR/AR applications, fast camera motions cause a high level of motion blur, causing existing camera pose estimation methods to fail. In this work, we propose a novel framework that leverages motion blur as a rich cue for motion estimation rather than treating it as an unwanted artifact. Our approach works by predicting a dense motion flow field and a monocular depth map directly from a single motion-blurred image. We then recover the instantaneous camera velocity by solving a linear least squares problem under the small motion assumption. In essence, our method produces an IMU-like measurement that robustly captures fast and aggressive camera movements. To train our model, we construct a large-scale dataset with realistic synthetic motion blur derived from ScanNet++v2 and further refine our model by training end-to-end on real data using our fully differentiable pipeline. Extensive evaluations on real-world benchmarks demonstrate that our method achieves state-of-the-art angular and translational velocity estimates, outperforming current methods like MASt3R and COLMAP.<br/></p><div class="text-with-links"><span></span><span><em>* <!-- -->Project page: <a href="https://jerredchen.github.io/image-as-imu/">https://jerredchen.github.io/image-as-imu/</a>聽</em><br/></span></div><div class="Search_search-result-provider__uWcak">Via<img alt="arxiv icon" loading="lazy" width="56" height="25" decoding="async" data-nimg="1" class="Search_arxiv-icon__SXHe4" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=64&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75"/></div><div class="Search_paper-link__nVhf_"><svg role="img" height="20" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" style="margin-right:5px"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="22" style="margin-right:10px;margin-top:2px"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"></path></svg><a data-testid="paper-result-access-link" href="/paper/image-as-an-imu-estimating-camera-motion-from">Access Paper or Ask Questions</a></div></section><div class="Search_seperator-line__4FidS"></div></div><div><section data-testid="paper-details-container" class="Search_paper-details-container__Dou2Q"><h2 class="Search_paper-heading__bq58c"><a data-testid="paper-result-title" href="/paper/from-monocular-vision-to-autonomous-action"><strong>From Monocular Vision to Autonomous Action: Guiding Tumor Resection via 3D Reconstruction</strong></a></h2><div class="Search_buttons-container__WWw_l"><a href="#" target="_blank" id="request-code-2503.16263" data-testid="view-code-button" class="Search_view-code-link__xOgGF"><button type="button" class="btn Search_view-button__D5D2K Search_buttons-spacing__iB2NS Search_black-button__O7oac Search_view-code-button__8Dk6Z"><svg role="img" height="14" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="#fff"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg>Request Code</button></a><button type="button" class="Search_buttons-spacing__iB2NS Search_related-code-btn__F5B3X" data-testid="related-code-button"><span class="descriptor" style="display:none">Code for Similar Papers:</span><img alt="Code for Similar Papers" title="View code for similar papers" loading="lazy" width="37" height="35" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75"/></button><a class="Search_buttons-spacing__iB2NS Search_add-code-button__GKwQr" target="_blank" href="/add_code?title=From Monocular Vision to Autonomous Action: Guiding Tumor Resection via 3D Reconstruction&paper_url=http://arxiv.org/abs/2503.16263" rel="nofollow"><img alt="Add code" title="Contribute your code for this paper to the community" loading="lazy" width="36" height="36" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75"/></a><div class="wrapper Search_buttons-spacing__iB2NS BookmarkButton_bookmark-wrapper__xJaOg"><button title="Bookmark this paper"><img alt="Bookmark button" id="bookmark-btn" loading="lazy" width="388" height="512" decoding="async" data-nimg="1" class="BookmarkButton_bookmark-btn-image__gkInJ" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75"/></button></div><div class="wrapper Search_buttons-spacing__iB2NS"><button class="AlertButton_alert-btn__pC8cK" title="Get alerts when new code is available for this paper"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div></div><span class="Search_publication-date__mLvO2">Mar 20, 2025<br/></span><div class="AuthorLinks_authors-container__fAwXT"><span class="descriptor" style="display:none">Authors:</span><span><a data-testid="paper-result-author" href="/author/Ayberk%20Acar">Ayberk Acar</a>, </span><span><a data-testid="paper-result-author" href="/author/Mariana%20Smith">Mariana Smith</a>, </span><span><a data-testid="paper-result-author" href="/author/Lidia%20Al-Zogbi">Lidia Al-Zogbi</a>, </span><span><a data-testid="paper-result-author" href="/author/Tanner%20Watts">Tanner Watts</a>, </span><span><a data-testid="paper-result-author" href="/author/Fangjie%20Li">Fangjie Li</a>, </span><span><a data-testid="paper-result-author" href="/author/Hao%20Li">Hao Li</a>, </span><span><a data-testid="paper-result-author" href="/author/Nural%20Yilmaz">Nural Yilmaz</a>, </span><span><a data-testid="paper-result-author" href="/author/Paul%20Maria%20Scheikl">Paul Maria Scheikl</a>, </span><span><a data-testid="paper-result-author" href="/author/Jesse%20F.%20d'Almeida">Jesse F. d'Almeida</a>, </span><span><a data-testid="paper-result-author" href="/author/Susheela%20Sharma">Susheela Sharma</a></span><span>(<a href="/s/Image%20To%20Image%20Translation#">+<!-- -->7<!-- --> more</a>)</span></div><div class="Search_paper-detail-page-images-container__FPeuN"></div><p class="Search_paper-content__1CSu5 text-with-links"><span class="descriptor" style="display:none">Abstract:</span>Surgical automation requires precise guidance and understanding of the scene. Current methods in the literature rely on bulky depth cameras to create maps of the anatomy, however this does not translate well to space-limited clinical applications. Monocular cameras are small and allow minimally invasive surgeries in tight spaces but additional processing is required to generate 3D scene understanding. We propose a 3D mapping pipeline that uses only RGB images to create segmented point clouds of the target anatomy. To ensure the most precise reconstruction, we compare different structure from motion algorithms' performance on mapping the central airway obstructions, and test the pipeline on a downstream task of tumor resection. In several metrics, including post-procedure tissue model evaluation, our pipeline performs comparably to RGB-D cameras and, in some cases, even surpasses their performance. These promising results demonstrate that automation guidance can be achieved in minimally invasive procedures with monocular cameras. This study is a step toward the complete autonomy of surgical robots.<br/></p><div class="text-with-links"><span></span><span><em>* <!-- -->7 Pages, 8 Figures, 1 Table. This work has been submitted IEEE/RSJ<!-- --> <!-- --> International Conference on Intelligent Robots and Systems (IROS) for<!-- --> <!-- --> possible publication<!-- -->聽</em><br/></span></div><div class="Search_search-result-provider__uWcak">Via<img alt="arxiv icon" loading="lazy" width="56" height="25" decoding="async" data-nimg="1" class="Search_arxiv-icon__SXHe4" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=64&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75"/></div><div class="Search_paper-link__nVhf_"><svg role="img" height="20" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" style="margin-right:5px"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="22" style="margin-right:10px;margin-top:2px"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"></path></svg><a data-testid="paper-result-access-link" href="/paper/from-monocular-vision-to-autonomous-action">Access Paper or Ask Questions</a></div></section><div class="Search_seperator-line__4FidS"></div></div><div><section data-testid="paper-details-container" class="Search_paper-details-container__Dou2Q"><h2 class="Search_paper-heading__bq58c"><a data-testid="paper-result-title" href="/paper/fast-alignment-of-heterogeneous-images-in"><strong>Fast alignment of heterogeneous images in sliced Wasserstein distance</strong></a></h2><div class="Search_buttons-container__WWw_l"><a href="#" target="_blank" id="request-code-2503.13756" data-testid="view-code-button" class="Search_view-code-link__xOgGF"><button type="button" class="btn Search_view-button__D5D2K Search_buttons-spacing__iB2NS Search_black-button__O7oac Search_view-code-button__8Dk6Z"><svg role="img" height="14" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" fill="#fff"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg>Request Code</button></a><button type="button" class="Search_buttons-spacing__iB2NS Search_related-code-btn__F5B3X" data-testid="related-code-button"><span class="descriptor" style="display:none">Code for Similar Papers:</span><img alt="Code for Similar Papers" title="View code for similar papers" loading="lazy" width="37" height="35" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Frelated_icon_transparent.98f57b13.png&w=96&q=75"/></button><a class="Search_buttons-spacing__iB2NS Search_add-code-button__GKwQr" target="_blank" href="/add_code?title=Fast alignment of heterogeneous images in sliced Wasserstein distance&paper_url=http://arxiv.org/abs/2503.13756" rel="nofollow"><img alt="Add code" title="Contribute your code for this paper to the community" loading="lazy" width="36" height="36" decoding="async" data-nimg="1" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=48&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Faddcode_white.6afb879f.png&w=96&q=75"/></a><div class="wrapper Search_buttons-spacing__iB2NS BookmarkButton_bookmark-wrapper__xJaOg"><button title="Bookmark this paper"><img alt="Bookmark button" id="bookmark-btn" loading="lazy" width="388" height="512" decoding="async" data-nimg="1" class="BookmarkButton_bookmark-btn-image__gkInJ" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbookmark_outline.3a3e1c2c.png&w=828&q=75"/></button></div><div class="wrapper Search_buttons-spacing__iB2NS"><button class="AlertButton_alert-btn__pC8cK" title="Get alerts when new code is available for this paper"><img alt="Alert button" id="alert_btn" loading="lazy" width="512" height="512" decoding="async" data-nimg="1" class="alert-btn-image " style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=640&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Falert_light_mode_icon.b8fca154.png&w=1080&q=75"/></button><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 106 34" style="margin-left:9px"><g class="sparkles"><path style="animation:sparkle 2s 0s infinite ease-in-out" d="M15.5740361 -10.33344622s1.1875777-6.20179466 2.24320232 0c0 0 5.9378885 1.05562462 0 2.11124925 0 0-1.05562463 6.33374774-2.24320233 0-3.5627331-.6597654-3.29882695-1.31953078 0-2.11124925z"></path><path style="animation:sparkle 1.5s 0.9s infinite ease-in-out" d="M33.5173993 75.97263826s1.03464615-5.40315215 1.95433162 0c0 0 5.17323078.91968547 0 1.83937095 0 0-.91968547 5.51811283-1.95433162 0-3.10393847-.57480342-2.8740171-1.14960684 0-1.83937095z"></path><path style="animation:sparkle 1.7s 0.4s infinite ease-in-out" d="M69.03038108 1.71240809s.73779281-3.852918 1.39360864 0c0 0 3.68896404.65581583 0 1.31163166 0 0-.65581583 3.93489497-1.39360864 0-2.21337842-.4098849-2.04942447-.81976979 0-1.31163166z"></path></g></svg></div></div><span class="Search_publication-date__mLvO2">Mar 17, 2025<br/></span><div class="AuthorLinks_authors-container__fAwXT"><span class="descriptor" style="display:none">Authors:</span><span><a data-testid="paper-result-author" href="/author/Yunpeng%20Shi">Yunpeng Shi</a>, </span><span><a data-testid="paper-result-author" href="/author/Amit%20Singer">Amit Singer</a>, </span><span><a data-testid="paper-result-author" href="/author/Eric%20J.%20Verbeke">Eric J. Verbeke</a></span></div><div class="Search_paper-detail-page-images-container__FPeuN"></div><p class="Search_paper-content__1CSu5 text-with-links"><span class="descriptor" style="display:none">Abstract:</span>Many applications of computer vision rely on the alignment of similar but non-identical images. We present a fast algorithm for aligning heterogeneous images based on optimal transport. Our approach combines the speed of fast Fourier methods with the robustness of sliced probability metrics and allows us to efficiently compute the alignment between two $L \times L$ images using the sliced 2-Wasserstein distance in $O(L^2 \log L)$ operations. We show that our method is robust to translations, rotations and deformations in the images.<br/></p><div class="text-with-links"><span></span><span></span></div><div class="Search_search-result-provider__uWcak">Via<img alt="arxiv icon" loading="lazy" width="56" height="25" decoding="async" data-nimg="1" class="Search_arxiv-icon__SXHe4" style="color:transparent" srcSet="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=64&q=75 1x, /_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75 2x" src="/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Farxiv.41e50dc5.png&w=128&q=75"/></div><div class="Search_paper-link__nVhf_"><svg role="img" height="20" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" style="margin-right:5px"><title>Github Icon</title><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg><svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" aria-hidden="true" data-slot="icon" width="22" style="margin-right:10px;margin-top:2px"><path stroke-linecap="round" stroke-linejoin="round" d="M12 6.042A8.967 8.967 0 0 0 6 3.75c-1.052 0-2.062.18-3 .512v14.25A8.987 8.987 0 0 1 6 18c2.305 0 4.408.867 6 2.292m0-14.25a8.966 8.966 0 0 1 6-2.292c1.052 0 2.062.18 3 .512v14.25A8.987 8.987 0 0 0 18 18a8.967 8.967 0 0 0-6 2.292m0-14.25v14.25"></path></svg><a data-testid="paper-result-access-link" href="/paper/fast-alignment-of-heterogeneous-images-in">Access Paper or Ask Questions</a></div></section><div class="Search_seperator-line__4FidS"></div></div></section><section data-hydration-on-demand="true"></section></div><section data-hydration-on-demand="true"></section></div></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"results":[{"title":"LaPIG: Cross-Modal Generation of Paired Thermal and Visible Facial Images","content":"The success of modern machine learning, particularly in facial translation networks, is highly dependent on the availability of high-quality, paired, large-scale datasets. However, acquiring sufficient data is often challenging and costly. Inspired by the recent success of diffusion models in high-quality image synthesis and advancements in Large Language Models (LLMs), we propose a novel framework called LLM-assisted Paired Image Generation (LaPIG). This framework enables the construction of comprehensive, high-quality paired visible and thermal images using captions generated by LLMs. Our method encompasses three parts: visible image synthesis with ArcFace embedding, thermal image translation using Latent Diffusion Models (LDMs), and caption generation with LLMs. Our approach not only generates multi-view paired visible and thermal images to increase data diversity but also produces high-quality paired data while maintaining their identity information. We evaluate our method on public datasets by comparing it with existing methods, demonstrating the superiority of LaPIG.","authors":["Leyang Wang","Joice Lin"],"pdf_url":"http://arxiv.org/abs/2503.16376","paper_id":"2503.16376","link":"/paper/lapig-cross-modal-generation-of-paired","publication_date":"Mar 20, 2025","raw_publication_date":"2025-03-20","submission_date":"Mar 20, 2025","images":[],"arxiv_comment":null,"journal_ref":null,"code_available":false,"slug":"lapig-cross-modal-generation-of-paired"},{"title":"Single-Step Bidirectional Unpaired Image Translation Using Implicit Bridge Consistency Distillation","content":"Unpaired image-to-image translation has seen significant progress since the introduction of CycleGAN. However, methods based on diffusion models or Schr\\\"odinger bridges have yet to be widely adopted in real-world applications due to their iterative sampling nature. To address this challenge, we propose a novel framework, Implicit Bridge Consistency Distillation (IBCD), which enables single-step bidirectional unpaired translation without using adversarial loss. IBCD extends consistency distillation by using a diffusion implicit bridge model that connects PF-ODE trajectories between distributions. Additionally, we introduce two key improvements: 1) distribution matching for consistency distillation and 2) adaptive weighting method based on distillation difficulty. Experimental results demonstrate that IBCD achieves state-of-the-art performance on benchmark datasets in a single generation step. Project page available at https://hyn2028.github.io/project_page/IBCD/index.html","authors":["Suhyeon Lee","Kwanyoung Kim","Jong Chul Ye"],"pdf_url":"http://arxiv.org/abs/2503.15056","paper_id":"2503.15056","link":"/paper/single-step-bidirectional-unpaired-image","publication_date":"Mar 19, 2025","raw_publication_date":"2025-03-19","submission_date":"Mar 19, 2025","images":[],"arxiv_comment":"25 pages, 16 figures","journal_ref":null,"code_available":true,"slug":"single-step-bidirectional-unpaired-image"},{"title":"OpenMIBOOD: Open Medical Imaging Benchmarks for Out-Of-Distribution Detection","content":"The growing reliance on Artificial Intelligence (AI) in critical domains such as healthcare demands robust mechanisms to ensure the trustworthiness of these systems, especially when faced with unexpected or anomalous inputs. This paper introduces the Open Medical Imaging Benchmarks for Out-Of-Distribution Detection (OpenMIBOOD), a comprehensive framework for evaluating out-of-distribution (OOD) detection methods specifically in medical imaging contexts. OpenMIBOOD includes three benchmarks from diverse medical domains, encompassing 14 datasets divided into covariate-shifted in-distribution, near-OOD, and far-OOD categories. We evaluate 24 post-hoc methods across these benchmarks, providing a standardized reference to advance the development and fair comparison of OOD detection methods. Results reveal that findings from broad-scale OOD benchmarks in natural image domains do not translate to medical applications, underscoring the critical need for such benchmarks in the medical field. By mitigating the risk of exposing AI models to inputs outside their training distribution, OpenMIBOOD aims to support the advancement of reliable and trustworthy AI systems in healthcare. The repository is available at https://github.com/remic-othr/OpenMIBOOD.","authors":["Max Gutbrod","David Rauber","Danilo Weber Nunes","Christoph Palm"],"pdf_url":"http://arxiv.org/abs/2503.16247","paper_id":"2503.16247","link":"/paper/openmibood-open-medical-imaging-benchmarks","publication_date":"Mar 20, 2025","raw_publication_date":"2025-03-20","submission_date":"Mar 20, 2025","images":[],"arxiv_comment":null,"journal_ref":null,"code_available":true,"slug":"openmibood-open-medical-imaging-benchmarks"},{"title":"Whole-Body Image-to-Image Translation for a Virtual Scanner in a Healthcare Digital Twin","content":"Generating positron emission tomography (PET) images from computed tomography (CT) scans via deep learning offers a promising pathway to reduce radiation exposure and costs associated with PET imaging, improving patient care and accessibility to functional imaging. Whole-body image translation presents challenges due to anatomical heterogeneity, often limiting generalized models. We propose a framework that segments whole-body CT images into four regions-head, trunk, arms, and legs-and uses district-specific Generative Adversarial Networks (GANs) for tailored CT-to-PET translation. Synthetic PET images from each region are stitched together to reconstruct the whole-body scan. Comparisons with a baseline non-segmented GAN and experiments with Pix2Pix and CycleGAN architectures tested paired and unpaired scenarios. Quantitative evaluations at district, whole-body, and lesion levels demonstrated significant improvements with our district-specific GANs. Pix2Pix yielded superior metrics, ensuring precise, high-quality image synthesis. By addressing anatomical heterogeneity, this approach achieves state-of-the-art results in whole-body CT-to-PET translation. This methodology supports healthcare Digital Twins by enabling accurate virtual PET scans from CT data, creating virtual imaging representations to monitor, predict, and optimize health outcomes.","authors":["Valerio Guarrasi","Francesco Di Feola","Rebecca Restivo","Lorenzo Tronchin","Paolo Soda"],"pdf_url":"http://arxiv.org/abs/2503.15555","paper_id":"2503.15555","link":"/paper/whole-body-image-to-image-translation-for-a","publication_date":"Mar 18, 2025","raw_publication_date":"2025-03-18","submission_date":"Mar 18, 2025","images":[],"arxiv_comment":null,"journal_ref":null,"code_available":false,"slug":"whole-body-image-to-image-translation-for-a"},{"title":"Ultrasound Image-to-Video Synthesis via Latent Dynamic Diffusion Models","content":"Ultrasound video classification enables automated diagnosis and has emerged as an important research area. However, publicly available ultrasound video datasets remain scarce, hindering progress in developing effective video classification models. We propose addressing this shortage by synthesizing plausible ultrasound videos from readily available, abundant ultrasound images. To this end, we introduce a latent dynamic diffusion model (LDDM) to efficiently translate static images to dynamic sequences with realistic video characteristics. We demonstrate strong quantitative results and visually appealing synthesized videos on the BUSV benchmark. Notably, training video classification models on combinations of real and LDDM-synthesized videos substantially improves performance over using real data alone, indicating our method successfully emulates dynamics critical for discrimination. Our image-to-video approach provides an effective data augmentation solution to advance ultrasound video analysis. Code is available at https://github.com/MedAITech/U_I2V.","authors":["Tingxiu Chen","Yilei Shi","Zixuan Zheng","Bingcong Yan","Jingliang Hu","Xiao Xiang Zhu","Lichao Mou"],"pdf_url":"http://arxiv.org/abs/2503.14966","paper_id":"2503.14966","link":"/paper/ultrasound-image-to-video-synthesis-via","publication_date":"Mar 19, 2025","raw_publication_date":"2025-03-19","submission_date":"Mar 19, 2025","images":[],"arxiv_comment":"MICCAI 2024","journal_ref":null,"code_available":true,"slug":"ultrasound-image-to-video-synthesis-via"},{"title":"ARC: Anchored Representation Clouds for High-Resolution INR Classification","content":"Implicit neural representations (INRs) encode signals in neural network weights as a memory-efficient representation, decoupling sampling resolution from the associated resource costs. Current INR image classification methods are demonstrated on low-resolution data and are sensitive to image-space transformations. We attribute these issues to the global, fully-connected MLP neural network architecture encoding of current INRs, which lack mechanisms for local representation: MLPs are sensitive to absolute image location and struggle with high-frequency details. We propose ARC: Anchored Representation Clouds, a novel INR architecture that explicitly anchors latent vectors locally in image-space. By introducing spatial structure to the latent vectors, ARC captures local image data which in our testing leads to state-of-the-art implicit image classification of both low- and high-resolution images and increased robustness against image-space translation. Code can be found at https://github.com/JLuij/anchored_representation_clouds.","authors":["Joost Luijmes","Alexander Gielisse","Roman Knyazhitskiy","Jan van Gemert"],"pdf_url":"http://arxiv.org/abs/2503.15156","paper_id":"2503.15156","link":"/paper/arc-anchored-representation-clouds-for-high","publication_date":"Mar 19, 2025","raw_publication_date":"2025-03-19","submission_date":"Mar 19, 2025","images":[],"arxiv_comment":"Accepted at the ICLR 2025 Workshop on Neural Network Weights as a New\n Data Modality","journal_ref":null,"code_available":true,"slug":"arc-anchored-representation-clouds-for-high"},{"title":"ShapeShift: Towards Text-to-Shape Arrangement Synthesis with Content-Aware Geometric Constraints","content":"While diffusion-based models excel at generating photorealistic images from text, a more nuanced challenge emerges when constrained to using only a fixed set of rigid shapes, akin to solving tangram puzzles or arranging real-world objects to match semantic descriptions. We formalize this problem as shape-based image generation, a new text-guided image-to-image translation task that requires rearranging the input set of rigid shapes into non-overlapping configurations and visually communicating the target concept. Unlike pixel-manipulation approaches, our method, ShapeShift, explicitly parameterizes each shape within a differentiable vector graphics pipeline, iteratively optimizing placement and orientation through score distillation sampling from pretrained diffusion models. To preserve arrangement clarity, we introduce a content-aware collision resolution mechanism that applies minimal semantically coherent adjustments when overlaps occur, ensuring smooth convergence toward physically valid configurations. By bridging diffusion-based semantic guidance with explicit geometric constraints, our approach yields interpretable compositions where spatial relationships clearly embody the textual prompt. Extensive experiments demonstrate compelling results across diverse scenarios, with quantitative and qualitative advantages over alternative techniques.","authors":["Vihaan Misra","Peter Schaldenbrand","Jean Oh"],"pdf_url":"http://arxiv.org/abs/2503.14720","paper_id":"2503.14720","link":"/paper/shapeshift-towards-text-to-shape-arrangement","publication_date":"Mar 18, 2025","raw_publication_date":"2025-03-18","submission_date":"Mar 18, 2025","images":[],"arxiv_comment":null,"journal_ref":null,"code_available":false,"slug":"shapeshift-towards-text-to-shape-arrangement"},{"title":"Image as an IMU: Estimating Camera Motion from a Single Motion-Blurred Image","content":"In many robotics and VR/AR applications, fast camera motions cause a high level of motion blur, causing existing camera pose estimation methods to fail. In this work, we propose a novel framework that leverages motion blur as a rich cue for motion estimation rather than treating it as an unwanted artifact. Our approach works by predicting a dense motion flow field and a monocular depth map directly from a single motion-blurred image. We then recover the instantaneous camera velocity by solving a linear least squares problem under the small motion assumption. In essence, our method produces an IMU-like measurement that robustly captures fast and aggressive camera movements. To train our model, we construct a large-scale dataset with realistic synthetic motion blur derived from ScanNet++v2 and further refine our model by training end-to-end on real data using our fully differentiable pipeline. Extensive evaluations on real-world benchmarks demonstrate that our method achieves state-of-the-art angular and translational velocity estimates, outperforming current methods like MASt3R and COLMAP.","authors":["Jerred Chen","Ronald Clark"],"pdf_url":"http://arxiv.org/abs/2503.17358","paper_id":"2503.17358","link":"/paper/image-as-an-imu-estimating-camera-motion-from","publication_date":"Mar 21, 2025","raw_publication_date":"2025-03-21","submission_date":"Mar 21, 2025","images":[],"arxiv_comment":"Project page: https://jerredchen.github.io/image-as-imu/","journal_ref":null,"code_available":false,"slug":"image-as-an-imu-estimating-camera-motion-from"},{"title":"From Monocular Vision to Autonomous Action: Guiding Tumor Resection via 3D Reconstruction","content":"Surgical automation requires precise guidance and understanding of the scene. Current methods in the literature rely on bulky depth cameras to create maps of the anatomy, however this does not translate well to space-limited clinical applications. Monocular cameras are small and allow minimally invasive surgeries in tight spaces but additional processing is required to generate 3D scene understanding. We propose a 3D mapping pipeline that uses only RGB images to create segmented point clouds of the target anatomy. To ensure the most precise reconstruction, we compare different structure from motion algorithms' performance on mapping the central airway obstructions, and test the pipeline on a downstream task of tumor resection. In several metrics, including post-procedure tissue model evaluation, our pipeline performs comparably to RGB-D cameras and, in some cases, even surpasses their performance. These promising results demonstrate that automation guidance can be achieved in minimally invasive procedures with monocular cameras. This study is a step toward the complete autonomy of surgical robots.","authors":["Ayberk Acar","Mariana Smith","Lidia Al-Zogbi","Tanner Watts","Fangjie Li","Hao Li","Nural Yilmaz","Paul Maria Scheikl","Jesse F. d'Almeida","Susheela Sharma","Lauren Branscombe","Tayfun Efe Ertop","Robert J. Webster III","Ipek Oguz","Alan Kuntz","Axel Krieger","Jie Ying Wu"],"pdf_url":"http://arxiv.org/abs/2503.16263","paper_id":"2503.16263","link":"/paper/from-monocular-vision-to-autonomous-action","publication_date":"Mar 20, 2025","raw_publication_date":"2025-03-20","submission_date":"Mar 20, 2025","images":[],"arxiv_comment":"7 Pages, 8 Figures, 1 Table. This work has been submitted IEEE/RSJ\n International Conference on Intelligent Robots and Systems (IROS) for\n possible publication","journal_ref":null,"code_available":false,"slug":"from-monocular-vision-to-autonomous-action"},{"title":"Fast alignment of heterogeneous images in sliced Wasserstein distance","content":"Many applications of computer vision rely on the alignment of similar but non-identical images. We present a fast algorithm for aligning heterogeneous images based on optimal transport. Our approach combines the speed of fast Fourier methods with the robustness of sliced probability metrics and allows us to efficiently compute the alignment between two $L \\times L$ images using the sliced 2-Wasserstein distance in $O(L^2 \\log L)$ operations. We show that our method is robust to translations, rotations and deformations in the images.","authors":["Yunpeng Shi","Amit Singer","Eric J. Verbeke"],"pdf_url":"http://arxiv.org/abs/2503.13756","paper_id":"2503.13756","link":"/paper/fast-alignment-of-heterogeneous-images-in","publication_date":"Mar 17, 2025","raw_publication_date":"2025-03-17","submission_date":"Mar 17, 2025","images":[],"arxiv_comment":null,"journal_ref":null,"code_available":false,"slug":"fast-alignment-of-heterogeneous-images-in"}],"total":200,"topicBlurb":"Image-to-image translation is the process of converting an image from one domain to another using deep learning techniques.","similarResults":false,"query":"Image To Image Translation","userHasHiddenBanner":false,"isMobile":false,"currentBrowser":"","canonicalUrl":"https://www.catalyzex.com/s/Image%20To%20Image%20Translation"},"__N_SSP":true},"page":"/search","query":{"query":"Image%20To%20Image%20Translation"},"buildId":"rcP1HS6ompi8ywYpLW-WW","isFallback":false,"isExperimentalCompile":false,"dynamicIds":[7336],"gssp":true,"scriptLoader":[]}</script></body></html>