CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 268 results for author: <span class="mathjax">Xie, T</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Xie%2C+T">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Xie, T"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Xie%2C+T&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Xie, T"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xie%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11147">arXiv:2502.11147</a> <span> [<a href="https://arxiv.org/pdf/2502.11147">pdf</a>, <a href="https://arxiv.org/format/2502.11147">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Efficient Long-Decoding Inference with Reasoning-Aware Attention Sparsity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+J">Junhao Hu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenrui Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weidong Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenwen Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+T">Tiancheng Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhixia Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xusheng Chen</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+Y">Yizhou Shan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11147v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated strong capabilities across various domains, with recent advancements in challenging reasoning tasks such as mathematics and programming. However, solving reasoning tasks often requires long decoding chains (of thoughts), which incur $O(N)$ time and memory consumption, where $N$ is the chain length. To mitigate $O(N)$ time and memory consumption, exist… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11147v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11147v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11147v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated strong capabilities across various domains, with recent advancements in challenging reasoning tasks such as mathematics and programming. However, solving reasoning tasks often requires long decoding chains (of thoughts), which incur $O(N)$ time and memory consumption, where $N$ is the chain length. To mitigate $O(N)$ time and memory consumption, existing sparsity-based algorithms propose retaining only the most critical token's intermediate data (i.e., key-value cache) and discarding the rest. However, these existing algorithms struggle with the ``impossible trinity'' of accuracy, time, and memory. For example, the state-of-the-art algorithm, Quest, achieves high accuracy with $O(L)$ time but $O(N)$ memory ($L$ is the cache budget, $L \ll N$). To address this issue, in this paper, we identify a new attention pattern during the decode stage of reasoning tasks, where milestone tokens (analogous to lemmas in mathematical proofs) emerge, are utilized, and then become unimportant afterward. Based on this pattern, we propose a new algorithm named RaaS that identifies and retains milestone tokens only until they are no longer needed, achieving high accuracy with $O(L)$ time and $O(L)$ memory complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11147v1-abstract-full').style.display = 'none'; document.getElementById('2502.11147v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10581">arXiv:2502.10581</a> <span> [<a href="https://arxiv.org/pdf/2502.10581">pdf</a>, <a href="https://arxiv.org/ps/2502.10581">ps</a>, <a href="https://arxiv.org/format/2502.10581">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Do We Need to Verify Step by Step? Rethinking Process Supervision from a Theoretical Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jia%2C+Z">Zeyu Jia</a>, <a href="/search/cs?searchtype=author&query=Rakhlin%2C+A">Alexander Rakhlin</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tengyang Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10581v1-abstract-short" style="display: inline;"> As large language models have evolved, it has become crucial to distinguish between process supervision and outcome supervision -- two key reinforcement learning approaches to complex reasoning tasks. While process supervision offers intuitive advantages for long-term credit assignment, the precise relationship between these paradigms has remained an open question. Conventional wisdom suggests tha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10581v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10581v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10581v1-abstract-full" style="display: none;"> As large language models have evolved, it has become crucial to distinguish between process supervision and outcome supervision -- two key reinforcement learning approaches to complex reasoning tasks. While process supervision offers intuitive advantages for long-term credit assignment, the precise relationship between these paradigms has remained an open question. Conventional wisdom suggests that outcome supervision is fundamentally more challenging due to the trajectory-level coverage problem, leading to significant investment in collecting fine-grained process supervision data. In this paper, we take steps towards resolving this debate. Our main theorem shows that, under standard data coverage assumptions, reinforcement learning through outcome supervision is no more statistically difficult than through process supervision, up to polynomial factors in horizon. At the core of this result lies the novel Change of Trajectory Measure Lemma -- a technical tool that bridges return-based trajectory measure and step-level distribution shift. Furthermore, for settings with access to a verifier or a rollout capability, we prove that any policy's advantage function can serve as an optimal process reward model, providing a direct connection between outcome and process supervision. These findings suggest that the empirically observed performance gap -- if any -- between outcome and process supervision likely stems from algorithmic limitations rather than inherent statistical difficulties, potentially transforming how we approach data collection and algorithm design for reinforcement learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10581v1-abstract-full').style.display = 'none'; document.getElementById('2502.10581v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07527">arXiv:2502.07527</a> <span> [<a href="https://arxiv.org/pdf/2502.07527">pdf</a>, <a href="https://arxiv.org/format/2502.07527">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NatureLM: Deciphering the Language of Nature for Scientific Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yingce Xia</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+P">Peiran Jin</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+S">Shufang Xie</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Liang He</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+C">Chuan Cao</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+R">Renqian Luo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guoqing Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zequn Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuan-Jyue Chen</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zekun Guo</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yeqi Bai</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+P">Pan Deng</a>, <a href="/search/cs?searchtype=author&query=Min%2C+Y">Yaosen Min</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Ziheng Lu</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+H">Hongxia Hao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Han Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jielan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chang Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jia Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jianwei Zhu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+K">Kehan Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+K">Kaiyuan Gao</a>, <a href="/search/cs?searchtype=author&query=Pei%2C+Q">Qizhi Pei</a> , et al. (20 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07527v1-abstract-short" style="display: inline;"> Foundation models have revolutionized natural language processing and artificial intelligence, significantly enhancing how machines comprehend and generate human languages. Inspired by the success of these foundation models, researchers have developed foundation models for individual scientific domains, including small molecules, materials, proteins, DNA, and RNA. However, these models are typical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07527v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07527v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07527v1-abstract-full" style="display: none;"> Foundation models have revolutionized natural language processing and artificial intelligence, significantly enhancing how machines comprehend and generate human languages. Inspired by the success of these foundation models, researchers have developed foundation models for individual scientific domains, including small molecules, materials, proteins, DNA, and RNA. However, these models are typically trained in isolation, lacking the ability to integrate across different scientific domains. Recognizing that entities within these domains can all be represented as sequences, which together form the "language of nature", we introduce Nature Language Model (briefly, NatureLM), a sequence-based science foundation model designed for scientific discovery. Pre-trained with data from multiple scientific domains, NatureLM offers a unified, versatile model that enables various applications including: (i) generating and optimizing small molecules, proteins, RNA, and materials using text instructions; (ii) cross-domain generation/design, such as protein-to-molecule and protein-to-RNA generation; and (iii) achieving state-of-the-art performance in tasks like SMILES-to-IUPAC translation and retrosynthesis on USPTO-50k. NatureLM offers a promising generalist approach for various scientific tasks, including drug discovery (hit generation/optimization, ADMET optimization, synthesis), novel material design, and the development of therapeutic proteins or nucleotides. We have developed NatureLM models in different sizes (1 billion, 8 billion, and 46.7 billion parameters) and observed a clear improvement in performance as the model size increases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07527v1-abstract-full').style.display = 'none'; document.getElementById('2502.07527v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">81 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04730">arXiv:2502.04730</a> <span> [<a href="https://arxiv.org/pdf/2502.04730">pdf</a>, <a href="https://arxiv.org/format/2502.04730">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> PhyloVAE: Unsupervised Learning of Phylogenetic Trees via Variational Autoencoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyu Xie</a>, <a href="/search/cs?searchtype=author&query=Richman%2C+H">Harry Richman</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jiansi Gao</a>, <a href="/search/cs?searchtype=author&query=Matsen%2C+F+A">Frederick A. Matsen IV</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Cheng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04730v1-abstract-short" style="display: inline;"> Learning informative representations of phylogenetic tree structures is essential for analyzing evolutionary relationships. Classical distance-based methods have been widely used to project phylogenetic trees into Euclidean space, but they are often sensitive to the choice of distance metric and may lack sufficient resolution. In this paper, we introduce phylogenetic variational autoencoders (Phyl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04730v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04730v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04730v1-abstract-full" style="display: none;"> Learning informative representations of phylogenetic tree structures is essential for analyzing evolutionary relationships. Classical distance-based methods have been widely used to project phylogenetic trees into Euclidean space, but they are often sensitive to the choice of distance metric and may lack sufficient resolution. In this paper, we introduce phylogenetic variational autoencoders (PhyloVAEs), an unsupervised learning framework designed for representation learning and generative modeling of tree topologies. Leveraging an efficient encoding mechanism inspired by autoregressive tree topology generation, we develop a deep latent-variable generative model that facilitates fast, parallelized topology generation. PhyloVAE combines this generative model with a collaborative inference model based on learnable topological features, allowing for high-resolution representations of phylogenetic tree samples. Extensive experiments demonstrate PhyloVAE's robust representation learning capabilities and fast generation of phylogenetic tree topologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04730v1-abstract-full').style.display = 'none'; document.getElementById('2502.04730v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025. 22 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04491">arXiv:2502.04491</a> <span> [<a href="https://arxiv.org/pdf/2502.04491">pdf</a>, <a href="https://arxiv.org/ps/2502.04491">ps</a>, <a href="https://arxiv.org/format/2502.04491">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Provable Sample-Efficient Transfer Learning Conditional Diffusion Models via Representation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+Z">Ziheng Cheng</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyu Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shiyue Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Cheng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04491v1-abstract-short" style="display: inline;"> While conditional diffusion models have achieved remarkable success in various applications, they require abundant data to train from scratch, which is often infeasible in practice. To address this issue, transfer learning has emerged as an essential paradigm in small data regimes. Despite its empirical success, the theoretical underpinnings of transfer learning conditional diffusion models remain… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04491v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04491v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04491v1-abstract-full" style="display: none;"> While conditional diffusion models have achieved remarkable success in various applications, they require abundant data to train from scratch, which is often infeasible in practice. To address this issue, transfer learning has emerged as an essential paradigm in small data regimes. Despite its empirical success, the theoretical underpinnings of transfer learning conditional diffusion models remain unexplored. In this paper, we take the first step towards understanding the sample efficiency of transfer learning conditional diffusion models through the lens of representation learning. Inspired by practical training procedures, we assume that there exists a low-dimensional representation of conditions shared across all tasks. Our analysis shows that with a well-learned representation from source tasks, the samplecomplexity of target tasks can be reduced substantially. In addition, we investigate the practical implications of our theoretical results in several real-world applications of conditional diffusion models. Numerical experiments are also conducted to verify our results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04491v1-abstract-full').style.display = 'none'; document.getElementById('2502.04491v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03449">arXiv:2502.03449</a> <span> [<a href="https://arxiv.org/pdf/2502.03449">pdf</a>, <a href="https://arxiv.org/format/2502.03449">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Dress-1-to-3: Single Image to Simulation-Ready 3D Outfit with Diffusion Prior and Differentiable Physics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuan Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chang Yu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+W">Wenxin Du</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Ying Jiang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyi Xie</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yunuo Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yin Yang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03449v1-abstract-short" style="display: inline;"> Recent advances in large models have significantly advanced image-to-3D reconstruction. However, the generated models are often fused into a single piece, limiting their applicability in downstream tasks. This paper focuses on 3D garment generation, a key area for applications like virtual try-on with dynamic garment animations, which require garments to be separable and simulation-ready. We intro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03449v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03449v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03449v1-abstract-full" style="display: none;"> Recent advances in large models have significantly advanced image-to-3D reconstruction. However, the generated models are often fused into a single piece, limiting their applicability in downstream tasks. This paper focuses on 3D garment generation, a key area for applications like virtual try-on with dynamic garment animations, which require garments to be separable and simulation-ready. We introduce Dress-1-to-3, a novel pipeline that reconstructs physics-plausible, simulation-ready separated garments with sewing patterns and humans from an in-the-wild image. Starting with the image, our approach combines a pre-trained image-to-sewing pattern generation model for creating coarse sewing patterns with a pre-trained multi-view diffusion model to produce multi-view images. The sewing pattern is further refined using a differentiable garment simulator based on the generated multi-view images. Versatile experiments demonstrate that our optimization approach substantially enhances the geometric alignment of the reconstructed 3D garments and humans with the input image. Furthermore, by integrating a texture generation module and a human motion generation module, we produce customized physics-plausible and realistic dynamic garment demonstrations. Project page: https://dress-1-to-3.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03449v1-abstract-full').style.display = 'none'; document.getElementById('2502.03449v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://dress-1-to-3.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02567">arXiv:2502.02567</a> <span> [<a href="https://arxiv.org/pdf/2502.02567">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Fairness in Survival Analysis: A Novel Conditional Mutual Information Augmentation Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyang Xie</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yong Ge</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02567v1-abstract-short" style="display: inline;"> Survival analysis, a vital tool for predicting the time to event, has been used in many domains such as healthcare, criminal justice, and finance. Like classification tasks, survival analysis can exhibit bias against disadvantaged groups, often due to biases inherent in data or algorithms. Several studies in both the IS and CS communities have attempted to address fairness in survival analysis. Ho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02567v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02567v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02567v1-abstract-full" style="display: none;"> Survival analysis, a vital tool for predicting the time to event, has been used in many domains such as healthcare, criminal justice, and finance. Like classification tasks, survival analysis can exhibit bias against disadvantaged groups, often due to biases inherent in data or algorithms. Several studies in both the IS and CS communities have attempted to address fairness in survival analysis. However, existing methods often overlook the importance of prediction fairness at pre-defined evaluation time points, which is crucial in real-world applications where decision making often hinges on specific time frames. To address this critical research gap, we introduce a new fairness concept: equalized odds (EO) in survival analysis, which emphasizes prediction fairness at pre-defined time points. To achieve the EO fairness in survival analysis, we propose a Conditional Mutual Information Augmentation (CMIA) approach, which features a novel fairness regularization term based on conditional mutual information and an innovative censored data augmentation technique. Our CMIA approach can effectively balance prediction accuracy and fairness, and it is applicable to various survival models. We evaluate the CMIA approach against several state-of-the-art methods within three different application domains, and the results demonstrate that CMIA consistently reduces prediction disparity while maintaining good accuracy and significantly outperforms the other competing methods across multiple datasets and survival models (e.g., linear COX, deep AFT). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02567v1-abstract-full').style.display = 'none'; document.getElementById('2502.02567v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19155">arXiv:2501.19155</a> <span> [<a href="https://arxiv.org/pdf/2501.19155">pdf</a>, <a href="https://arxiv.org/format/2501.19155">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SWAT: Sliding Window Adversarial Training for Gradual Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zixi Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yubo Huang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+W">Wenwei Luo</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tonglan Xie</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+M">Mengmeng Jing</a>, <a href="/search/cs?searchtype=author&query=Zuo%2C+L">Lin Zuo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19155v1-abstract-short" style="display: inline;"> Domain shifts are critical issues that harm the performance of machine learning. Unsupervised Domain Adaptation (UDA) mitigates this issue but suffers when the domain shifts are steep and drastic. Gradual Domain Adaptation (GDA) alleviates this problem in a mild way by gradually adapting from the source to the target domain using multiple intermediate domains. In this paper, we propose Sliding Win… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19155v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19155v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19155v1-abstract-full" style="display: none;"> Domain shifts are critical issues that harm the performance of machine learning. Unsupervised Domain Adaptation (UDA) mitigates this issue but suffers when the domain shifts are steep and drastic. Gradual Domain Adaptation (GDA) alleviates this problem in a mild way by gradually adapting from the source to the target domain using multiple intermediate domains. In this paper, we propose Sliding Window Adversarial Training (SWAT) for Gradual Domain Adaptation. SWAT uses the construction of adversarial streams to connect the feature spaces of the source and target domains. In order to gradually narrow the small gap between adjacent intermediate domains, a sliding window paradigm is designed that moves along the adversarial stream. When the window moves to the end of the stream, i.e., the target domain, the domain shift is drastically reduced. Extensive experiments are conducted on public GDA benchmarks, and the results demonstrate that the proposed SWAT significantly outperforms the state-of-the-art approaches. The implementation is available at: https://anonymous.4open.science/r/SWAT-8677. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19155v1-abstract-full').style.display = 'none'; document.getElementById('2501.19155v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to icml 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16550">arXiv:2501.16550</a> <span> [<a href="https://arxiv.org/pdf/2501.16550">pdf</a>, <a href="https://arxiv.org/format/2501.16550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PhysAnimator: Physics-Guided Generative Cartoon Animation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyi Xie</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yiwei Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Ying Jiang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16550v1-abstract-short" style="display: inline;"> Creating hand-drawn animation sequences is labor-intensive and demands professional expertise. We introduce PhysAnimator, a novel approach for generating physically plausible meanwhile anime-stylized animation from static anime illustrations. Our method seamlessly integrates physics-based simulations with data-driven generative models to produce dynamic and visually compelling animations. To captu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16550v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16550v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16550v1-abstract-full" style="display: none;"> Creating hand-drawn animation sequences is labor-intensive and demands professional expertise. We introduce PhysAnimator, a novel approach for generating physically plausible meanwhile anime-stylized animation from static anime illustrations. Our method seamlessly integrates physics-based simulations with data-driven generative models to produce dynamic and visually compelling animations. To capture the fluidity and exaggeration characteristic of anime, we perform image-space deformable body simulations on extracted mesh geometries. We enhance artistic control by introducing customizable energy strokes and incorporating rigging point support, enabling the creation of tailored animation effects such as wind interactions. Finally, we extract and warp sketches from the simulation sequence, generating a texture-agnostic representation, and employ a sketch-guided video diffusion model to synthesize high-quality animation frames. The resulting animations exhibit temporal consistency and visual plausibility, demonstrating the effectiveness of our method in creating dynamic anime-style animations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16550v1-abstract-full').style.display = 'none'; document.getElementById('2501.16550v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16355">arXiv:2501.16355</a> <span> [<a href="https://arxiv.org/pdf/2501.16355">pdf</a>, <a href="https://arxiv.org/format/2501.16355">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> How Strategic Agents Respond: Comparing Analytical Models with LLM-Generated Responses in Strategic Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a>, <a href="/search/cs?searchtype=author&query=Rauch%2C+P">Pavan Rauch</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xueru Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16355v1-abstract-short" style="display: inline;"> When machine learning (ML) algorithms are used to automate human-related decisions, human agents may gain knowledge of the decision policy and behave strategically to obtain desirable outcomes. Strategic Classification (SC) has been proposed to address the interplay between agents and decision-makers. Prior work on SC has relied on assumptions that agents are perfectly or approximately rational, r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16355v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16355v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16355v1-abstract-full" style="display: none;"> When machine learning (ML) algorithms are used to automate human-related decisions, human agents may gain knowledge of the decision policy and behave strategically to obtain desirable outcomes. Strategic Classification (SC) has been proposed to address the interplay between agents and decision-makers. Prior work on SC has relied on assumptions that agents are perfectly or approximately rational, responding to decision policies by maximizing their utilities. Verifying these assumptions is challenging due to the difficulty of collecting real-world agent responses. Meanwhile, the growing adoption of large language models (LLMs) makes it increasingly likely that human agents in SC settings will seek advice from these tools. We propose using strategic advice generated by LLMs to simulate human agent responses in SC. Specifically, we examine five critical SC scenarios -- hiring, loan applications, school admissions, personal income, and public assistance programs -- and simulate how human agents with diverse profiles seek advice from LLMs. We then compare the resulting agent responses with the best responses generated by existing theoretical models. Our findings reveal that: (i) LLMs and theoretical models generally lead to agent score or qualification changes in the same direction across most settings, with both achieving similar levels of fairness; (ii) state-of-the-art commercial LLMs (e.g., GPT-3.5, GPT-4) consistently provide helpful suggestions, though these suggestions typically do not result in maximal score or qualification improvements; and (iii) LLMs tend to produce more diverse agent responses, often favoring more balanced effort allocation strategies. These results suggest that theoretical models align with LLMs to some extent and that leveraging LLMs to simulate more realistic agent responses offers a promising approach to designing trustworthy ML systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16355v1-abstract-full').style.display = 'none'; document.getElementById('2501.16355v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14417">arXiv:2501.14417</a> <span> [<a href="https://arxiv.org/pdf/2501.14417">pdf</a>, <a href="https://arxiv.org/format/2501.14417">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> DeepFlow: Serverless Large Language Model Serving at Scale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+J">Junhao Hu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiang Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhixia Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yulong He</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuetao Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hao Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiang Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Baoquan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+S">Shining Wan</a>, <a href="/search/cs?searchtype=author&query=Dan%2C+G">Gengyuan Dan</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zhiyu Dong</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Z">Zhihao Ren</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+J">Jie Meng</a>, <a href="/search/cs?searchtype=author&query=He%2C+C">Chao He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Changhong Liu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+D">Dayun Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qin Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yue Yu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+H">Hao Feng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xusheng Chen</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+Y">Yizhou Shan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14417v2-abstract-short" style="display: inline;"> This paper introduces DeepFlow, a scalable and serverless AI platform designed to efficiently serve large language models (LLMs) at scale in cloud environments. DeepFlow addresses key challenges such as resource allocation, serving efficiency, and cold start latencies through four main design components. First, it uses a simple serverless abstraction called the request-job-task model, which helps… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14417v2-abstract-full').style.display = 'inline'; document.getElementById('2501.14417v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14417v2-abstract-full" style="display: none;"> This paper introduces DeepFlow, a scalable and serverless AI platform designed to efficiently serve large language models (LLMs) at scale in cloud environments. DeepFlow addresses key challenges such as resource allocation, serving efficiency, and cold start latencies through four main design components. First, it uses a simple serverless abstraction called the request-job-task model, which helps manage AI workloads across post-training and model serving tasks. Second, it builds an in-house serving engine FlowServe using a microkernel-inspired design, NPU-centric execution, and SPMD-based parallelism to optimize LLM serving. The system also includes novel scheduling policies tailored for both PD-disaggregated and PD-colocated configurations. With optimizations like pre-warmed pods, DRAM pre-loading, and NPU-fork, DeepFlow can scale up to 64 instances in seconds. DeepFlow has been in production for over a year, operating on a large Ascend NPU cluster and providing industrystandard APIs for fine-tuning, agent serving, and model serving to our customers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14417v2-abstract-full').style.display = 'none'; document.getElementById('2501.14417v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11052">arXiv:2501.11052</a> <span> [<a href="https://arxiv.org/pdf/2501.11052">pdf</a>, <a href="https://arxiv.org/format/2501.11052">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> SLVC-DIDA: Signature-less Verifiable Credential-based Issuer-hiding and Multi-party Authentication for Decentralized Identity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianxiu Xie</a>, <a href="/search/cs?searchtype=author&query=Gai%2C+K">Keke Gai</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jing Yu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+C">Chennan Guo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Liehuang Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11052v1-abstract-short" style="display: inline;"> As an emerging paradigm in digital identity, Decentralized Identity (DID) appears advantages over traditional identity management methods in a variety of aspects, e.g., enhancing user-centric online services and ensuring complete user autonomy and control. Verifiable Credential (VC) techniques are used to facilitate decentralized DID-based access control across multiple entities. However, existing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11052v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11052v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11052v1-abstract-full" style="display: none;"> As an emerging paradigm in digital identity, Decentralized Identity (DID) appears advantages over traditional identity management methods in a variety of aspects, e.g., enhancing user-centric online services and ensuring complete user autonomy and control. Verifiable Credential (VC) techniques are used to facilitate decentralized DID-based access control across multiple entities. However, existing DID schemes generally rely on a distributed public key infrastructure that also causes challenges, such as context information deduction, key exposure, and issuer data leakage. To address the issues above, this paper proposes a Permanent Issuer-Hiding (PIH)-based DID multi-party authentication framework with a signature-less VC model, named SLVC-DIDA, for the first time. Our proposed scheme avoids the dependence on signing keys by employing hashing and issuer membership proofs, which supports universal zero-knowledge multi-party DID authentications, eliminating additional technical integrations. We adopt a zero-knowledge RSA accumulator to maintain the anonymity of the issuer set, thereby enabling public verification while safeguarding the privacy of identity attributes via a Merkle tree-based VC list. By eliminating reliance on a Public Key Infrastructure (PKI), SLVC-DIDA enables fully decentralized issuance and verification of DIDs. Furthermore, our scheme ensures PIH through the implementation of the zero-knowledge Issuer set and VC list, so that the risks of key leakage and contextual inference attacks are effectively mitigated. Our experiments further evaluate the effectiveness and practicality of SLVC-DIDA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11052v1-abstract-full').style.display = 'none'; document.getElementById('2501.11052v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07814">arXiv:2501.07814</a> <span> [<a href="https://arxiv.org/pdf/2501.07814">pdf</a>, <a href="https://arxiv.org/format/2501.07814">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> STTS-EAD: Improving Spatio-Temporal Learning Based Time Series Prediction via </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yuanyuan Liang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianhao Zhang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tingyu Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07814v1-abstract-short" style="display: inline;"> Handling anomalies is a critical preprocessing step in multivariate time series prediction. However, existing approaches that separate anomaly preprocessing from model training for multivariate time series prediction encounter significant limitations. Specifically, these methods fail to utilize auxiliary information crucial for identifying latent anomalies associated with spatiotemporal factors du… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07814v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07814v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07814v1-abstract-full" style="display: none;"> Handling anomalies is a critical preprocessing step in multivariate time series prediction. However, existing approaches that separate anomaly preprocessing from model training for multivariate time series prediction encounter significant limitations. Specifically, these methods fail to utilize auxiliary information crucial for identifying latent anomalies associated with spatiotemporal factors during the preprocessing stage. Instead, they rely solely on data distribution for anomaly detection, which can result in the incorrect processing of numerous samples that could otherwise contribute positively to model training. To address this, we propose STTS-EAD, an end-to-end method that seamlessly integrates anomaly detection into the training process of multivariate time series forecasting and aims to improve Spatio-Temporal learning based Time Series prediction via Embedded Anomaly Detection. Our proposed STTS-EAD leverages spatio-temporal information for forecasting and anomaly detection, with the two parts alternately executed and optimized for each other. To the best of our knowledge, STTS-EAD is the first to integrate anomaly detection and forecasting tasks in the training phase for improving the accuracy of multivariate time series forecasting. Extensive experiments on a public stock dataset and two real-world sales datasets from a renowned coffee chain enterprise show that our proposed method can effectively process detected anomalies in the training stage to improve forecasting performance in the inference stage and significantly outperform baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07814v1-abstract-full').style.display = 'none'; document.getElementById('2501.07814v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07487">arXiv:2501.07487</a> <span> [<a href="https://arxiv.org/pdf/2501.07487">pdf</a>, <a href="https://arxiv.org/ps/2501.07487">ps</a>, <a href="https://arxiv.org/format/2501.07487">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Data and System Perspectives of Sustainable Artificial Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a>, <a href="/search/cs?searchtype=author&query=Harel%2C+D">David Harel</a>, <a href="/search/cs?searchtype=author&query=Ran%2C+D">Dezhi Ran</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenwen Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Maoliang Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhi Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Leye Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiang Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Ying Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wentao Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Meng Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chen Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyi Li</a>, <a href="/search/cs?searchtype=author&query=Marron%2C+A">Assaf Marron</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07487v1-abstract-short" style="display: inline;"> Sustainable AI is a subfield of AI for concerning developing and using AI systems in ways of aiming to reduce environmental impact and achieve sustainability. Sustainable AI is increasingly important given that training of and inference with AI models such as large langrage models are consuming a large amount of computing power. In this article, we discuss current issues, opportunities and example… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07487v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07487v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07487v1-abstract-full" style="display: none;"> Sustainable AI is a subfield of AI for concerning developing and using AI systems in ways of aiming to reduce environmental impact and achieve sustainability. Sustainable AI is increasingly important given that training of and inference with AI models such as large langrage models are consuming a large amount of computing power. In this article, we discuss current issues, opportunities and example solutions for addressing these issues, and future challenges to tackle, from the data and system perspectives, related to data acquisition, data processing, and AI model training and inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07487v1-abstract-full').style.display = 'none'; document.getElementById('2501.07487v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02901">arXiv:2501.02901</a> <span> [<a href="https://arxiv.org/pdf/2501.02901">pdf</a>, <a href="https://arxiv.org/format/2501.02901">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> DeCon: Detecting Incorrect Assertions via Postconditions Generated by a Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hao Yu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tianyu Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiaming Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zongyang Li</a>, <a href="/search/cs?searchtype=author&query=Ran%2C+D">Dezhi Ran</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyu Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Ying Li</a>, <a href="/search/cs?searchtype=author&query=Marron%2C+A">Assaf Marron</a>, <a href="/search/cs?searchtype=author&query=Harel%2C+D">David Harel</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yuan Xie</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02901v1-abstract-short" style="display: inline;"> Recently, given the docstring for the target problem and the target function signature, large language models (LLMs) have been used not only to generate source code, but also to generate test cases, consisting of test inputs and assertions (e.g., in the form of checking an actual output against the expected output). However, as shown by our empirical study on assertions generated by four LLMs for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02901v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02901v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02901v1-abstract-full" style="display: none;"> Recently, given the docstring for the target problem and the target function signature, large language models (LLMs) have been used not only to generate source code, but also to generate test cases, consisting of test inputs and assertions (e.g., in the form of checking an actual output against the expected output). However, as shown by our empirical study on assertions generated by four LLMs for the HumanEval benchmark, over 62% of the generated assertions are incorrect (i.e., failed on the ground-truth problem solution). To detect incorrect assertions (given the docstring and the target function signature along with a sample of example inputs and outputs), in this paper, we propose a new approach named DeCon to effectively detect incorrect assertions via LLM-generated postconditions for the target problem (a postcondition is a predicate that must always be true just after the execution of the ground-truth problem solution). Our approach requires a small set of I/O examples (i.e., a sample of example inputs and outputs) for the target problem (e.g., the I/O examples included in the docstring for a target problem in HumanEval). We use the given I/O examples to filter out those LLM-generated postconditions that are violated by at least one given I/O example. We then use the remaining postconditions to detect incorrect assertions as those assertions that violate at least one remaining postcondition. Experimental results show that DeCon can detect averagely more than 64% (63% and 65.5% detected by GPT-3.5 and GPT-4, respectively) incorrect assertions generated by four state-of-the-art LLMs, and DeCon can also improve the effectiveness of these LLMs in code generation by 4% in terms of Pass@1. In addition, although DeCon might filter out correct assertions, the fault-finding ability of the remaining correct assertions decreases only slightly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02901v1-abstract-full').style.display = 'none'; document.getElementById('2501.02901v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02863">arXiv:2501.02863</a> <span> [<a href="https://arxiv.org/pdf/2501.02863">pdf</a>, <a href="https://arxiv.org/format/2501.02863">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Beyond Pass or Fail: Multi-Dimensional Benchmarking of Foundation Models for Goal-based Mobile UI Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ran%2C+D">Dezhi Ran</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Mengzhou Wu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hao Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuetong Li</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jun Ren</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuan Cao</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+X">Xia Zeng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Haochuan Lu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zexin Xu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Mengqian Xu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+T">Ting Su</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+L">Liangchao Yao</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+T">Ting Xiong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wei Yang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Y">Yuetang Deng</a>, <a href="/search/cs?searchtype=author&query=Marron%2C+A">Assaf Marron</a>, <a href="/search/cs?searchtype=author&query=Harel%2C+D">David Harel</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02863v2-abstract-short" style="display: inline;"> Recent advances of foundation models (FMs) have made navigating mobile applications (apps) based on high-level goal instructions within reach, with significant industrial applications such as UI testing. While existing benchmarks evaluate FM-based UI navigation using the binary pass/fail metric, they have two major limitations: they cannot reflect the complex nature of mobile UI navigation where F… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02863v2-abstract-full').style.display = 'inline'; document.getElementById('2501.02863v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02863v2-abstract-full" style="display: none;"> Recent advances of foundation models (FMs) have made navigating mobile applications (apps) based on high-level goal instructions within reach, with significant industrial applications such as UI testing. While existing benchmarks evaluate FM-based UI navigation using the binary pass/fail metric, they have two major limitations: they cannot reflect the complex nature of mobile UI navigation where FMs may fail for various reasons (e.g., misunderstanding instructions and failed planning), and they lack industrial relevance due to oversimplified tasks that poorly represent real-world scenarios. To address the preceding limitations, we propose Sphinx, a comprehensive benchmark for multi-dimensional evaluation of FMs in industrial settings of UI navigation. Sphinx introduces a specialized toolkit that evaluates five essential FM capabilities, providing detailed insights into failure modes such as insufficient app knowledge or planning issues. Using both popular Google Play applications and WeChat's internal UI test cases, we evaluate 8 FMs with 20 different configurations. Our results show that existing FMs universally struggle with goal-based testing tasks, primarily due to insufficient UI-specific capabilities. We summarize seven lessons learned from benchmarking FMs with Sphinx, providing clear directions for improving FM-based mobile UI navigation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02863v2-abstract-full').style.display = 'none'; document.getElementById('2501.02863v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02829">arXiv:2501.02829</a> <span> [<a href="https://arxiv.org/pdf/2501.02829">pdf</a>, <a href="https://arxiv.org/format/2501.02829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> An Infrastructure Software Perspective Toward Computation Offloading between Executable Specifications and Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ran%2C+D">Dezhi Ran</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Mengzhou Wu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuan Cao</a>, <a href="/search/cs?searchtype=author&query=Marron%2C+A">Assaf Marron</a>, <a href="/search/cs?searchtype=author&query=Harel%2C+D">David Harel</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02829v1-abstract-short" style="display: inline;"> Foundation Models (FMs) have become essential components in modern software systems, excelling in tasks such as pattern recognition and unstructured data processing. However, their capabilities are complemented by the precision, verifiability, and deterministic nature of executable specifications, such as symbolic programs. This paper explores a new perspective on computation offloading, proposing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02829v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02829v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02829v1-abstract-full" style="display: none;"> Foundation Models (FMs) have become essential components in modern software systems, excelling in tasks such as pattern recognition and unstructured data processing. However, their capabilities are complemented by the precision, verifiability, and deterministic nature of executable specifications, such as symbolic programs. This paper explores a new perspective on computation offloading, proposing a framework that strategically distributes computational tasks between FMs and executable specifications based on their respective strengths. We discuss the potential design of an infrastructure software framework to enable this offloading, focusing on key mechanisms such as task decomposition, resource allocation, and adaptive optimization. Furthermore, we identify critical technical challenges, including semantic-gap resolution, reliability, and scalability, that must be addressed to realize this approach. By leveraging the complementary strengths of FMs and symbolic programs, this perspective lays the groundwork for advancing hybrid software systems that are both efficient and reliable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02829v1-abstract-full').style.display = 'none'; document.getElementById('2501.02829v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.15215">arXiv:2412.15215</a> <span> [<a href="https://arxiv.org/pdf/2412.15215">pdf</a>, <a href="https://arxiv.org/format/2412.15215">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> EnvGS: Modeling View-Dependent Appearance with Environment Gaussian </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhen Xu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yiman Xie</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yudong Jin</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yujun Shen</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+S">Sida Peng</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+H">Hujun Bao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiaowei Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.15215v1-abstract-short" style="display: inline;"> Reconstructing complex reflections in real-world scenes from 2D images is essential for achieving photorealistic novel view synthesis. Existing methods that utilize environment maps to model reflections from distant lighting often struggle with high-frequency reflection details and fail to account for near-field reflections. In this work, we introduce EnvGS, a novel approach that employs a set of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15215v1-abstract-full').style.display = 'inline'; document.getElementById('2412.15215v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.15215v1-abstract-full" style="display: none;"> Reconstructing complex reflections in real-world scenes from 2D images is essential for achieving photorealistic novel view synthesis. Existing methods that utilize environment maps to model reflections from distant lighting often struggle with high-frequency reflection details and fail to account for near-field reflections. In this work, we introduce EnvGS, a novel approach that employs a set of Gaussian primitives as an explicit 3D representation for capturing reflections of environments. These environment Gaussian primitives are incorporated with base Gaussian primitives to model the appearance of the whole scene. To efficiently render these environment Gaussian primitives, we developed a ray-tracing-based renderer that leverages the GPU's RT core for fast rendering. This allows us to jointly optimize our model for high-quality reconstruction while maintaining real-time rendering speeds. Results from multiple real-world and synthetic datasets demonstrate that our method produces significantly more detailed reflections, achieving the best rendering quality in real-time novel view synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15215v1-abstract-full').style.display = 'none'; document.getElementById('2412.15215v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://zju3dv.github.io/envgs/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14521">arXiv:2412.14521</a> <span> [<a href="https://arxiv.org/pdf/2412.14521">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dynamic User Interface Generation for Enhanced Human-Computer Interaction Using Variational Autoencoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Runsheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shixiao Wang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianfang Xie</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+S">Shiyu Duan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mengmeng Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14521v1-abstract-short" style="display: inline;"> This study presents a novel approach for intelligent user interaction interface generation and optimization, grounded in the variational autoencoder (VAE) model. With the rapid advancement of intelligent technologies, traditional interface design methods struggle to meet the evolving demands for diversity and personalization, often lacking flexibility in real-time adjustments to enhance the user e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14521v1-abstract-full').style.display = 'inline'; document.getElementById('2412.14521v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14521v1-abstract-full" style="display: none;"> This study presents a novel approach for intelligent user interaction interface generation and optimization, grounded in the variational autoencoder (VAE) model. With the rapid advancement of intelligent technologies, traditional interface design methods struggle to meet the evolving demands for diversity and personalization, often lacking flexibility in real-time adjustments to enhance the user experience. Human-Computer Interaction (HCI) plays a critical role in addressing these challenges by focusing on creating interfaces that are functional, intuitive, and responsive to user needs. This research leverages the RICO dataset to train the VAE model, enabling the simulation and creation of user interfaces that align with user aesthetics and interaction habits. By integrating real-time user behavior data, the system dynamically refines and optimizes the interface, improving usability and underscoring the importance of HCI in achieving a seamless user experience. Experimental findings indicate that the VAE-based approach significantly enhances the quality and precision of interface generation compared to other methods, including autoencoders (AE), generative adversarial networks (GAN), conditional GANs (cGAN), deep belief networks (DBN), and VAE-GAN. This work contributes valuable insights into HCI, providing robust technical solutions for automated interface generation and enhanced user experience optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14521v1-abstract-full').style.display = 'none'; document.getElementById('2412.14521v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13321">arXiv:2412.13321</a> <span> [<a href="https://arxiv.org/pdf/2412.13321">pdf</a>, <a href="https://arxiv.org/format/2412.13321">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LossLens: Diagnostics for Machine Learning through Loss Landscape Visual Analytics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tiankai Xie</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiaqing Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yaoqing Yang</a>, <a href="/search/cs?searchtype=author&query=Geniesse%2C+C">Caleb Geniesse</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+G">Ge Shi</a>, <a href="/search/cs?searchtype=author&query=Chaudhari%2C+A">Ajinkya Chaudhari</a>, <a href="/search/cs?searchtype=author&query=Cava%2C+J+K">John Kevin Cava</a>, <a href="/search/cs?searchtype=author&query=Mahoney%2C+M+W">Michael W. Mahoney</a>, <a href="/search/cs?searchtype=author&query=Perciano%2C+T">Talita Perciano</a>, <a href="/search/cs?searchtype=author&query=Weber%2C+G+H">Gunther H. Weber</a>, <a href="/search/cs?searchtype=author&query=Maciejewski%2C+R">Ross Maciejewski</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13321v1-abstract-short" style="display: inline;"> Modern machine learning often relies on optimizing a neural network's parameters using a loss function to learn complex features. Beyond training, examining the loss function with respect to a network's parameters (i.e., as a loss landscape) can reveal insights into the architecture and learning process. While the local structure of the loss landscape surrounding an individual solution can be char… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13321v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13321v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13321v1-abstract-full" style="display: none;"> Modern machine learning often relies on optimizing a neural network's parameters using a loss function to learn complex features. Beyond training, examining the loss function with respect to a network's parameters (i.e., as a loss landscape) can reveal insights into the architecture and learning process. While the local structure of the loss landscape surrounding an individual solution can be characterized using a variety of approaches, the global structure of a loss landscape, which includes potentially many local minima corresponding to different solutions, remains far more difficult to conceptualize and visualize. To address this difficulty, we introduce LossLens, a visual analytics framework that explores loss landscapes at multiple scales. LossLens integrates metrics from global and local scales into a comprehensive visual representation, enhancing model diagnostics. We demonstrate LossLens through two case studies: visualizing how residual connections influence a ResNet-20, and visualizing how physical parameters influence a physics-informed neural network (PINN) solving a simple convection problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13321v1-abstract-full').style.display = 'none'; document.getElementById('2412.13321v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11970">arXiv:2412.11970</a> <span> [<a href="https://arxiv.org/pdf/2412.11970">pdf</a>, <a href="https://arxiv.org/format/2412.11970">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DARWIN 1.5: Large Language Models as Materials Science Adapted Learners </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tong Xie</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+Y">Yuwei Wan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yixuan Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yuchen Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shaozhou Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenjie Zhang</a>, <a href="/search/cs?searchtype=author&query=Grazian%2C+C">Clara Grazian</a>, <a href="/search/cs?searchtype=author&query=Kit%2C+C">Chunyu Kit</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+D">Dongzhan Zhou</a>, <a href="/search/cs?searchtype=author&query=Hoex%2C+B">Bram Hoex</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11970v2-abstract-short" style="display: inline;"> Materials discovery and design aim to find compositions and structures with desirable properties over highly complex and diverse physical spaces. Traditional solutions, such as high-throughput simulations or machine learning, often rely on complex descriptors, which hinder generalizability and transferability across different material systems. Moreover, These descriptors may inadequately represent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11970v2-abstract-full').style.display = 'inline'; document.getElementById('2412.11970v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11970v2-abstract-full" style="display: none;"> Materials discovery and design aim to find compositions and structures with desirable properties over highly complex and diverse physical spaces. Traditional solutions, such as high-throughput simulations or machine learning, often rely on complex descriptors, which hinder generalizability and transferability across different material systems. Moreover, These descriptors may inadequately represent macro-scale material properties, which are influenced by structural imperfections and compositional variations in real-world samples, thus limiting their practical applicability. To address these challenges, we propose DARWIN 1.5, the largest open-source large language model tailored for materials science. By leveraging natural language as input, DARWIN eliminates the need for task-specific descriptors and enables a flexible, unified approach to material property prediction and discovery. Our approach integrates 6M material domain papers and 21 experimental datasets from 49,256 materials across modalities while enabling cross-task knowledge transfer. The enhanced model achieves up to 59.1% improvement in prediction accuracy over the base LLaMA-7B architecture and outperforms SOTA machine learning approaches across 8 materials design tasks. These results establish LLMs as a promising foundation for developing versatile and scalable models in materials science. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11970v2-abstract-full').style.display = 'none'; document.getElementById('2412.11970v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10434">arXiv:2412.10434</a> <span> [<a href="https://arxiv.org/pdf/2412.10434">pdf</a>, <a href="https://arxiv.org/format/2412.10434">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> NAT-NL2GQL: A Novel Multi-Agent Framework for Translating Natural Language to Graph Query Language </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yuanyuan Liang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tingyu Xie</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+G">Gan Peng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zihao Huang</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+Y">Yunshi Lan</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+W">Weining Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10434v1-abstract-short" style="display: inline;"> The emergence of Large Language Models (LLMs) has revolutionized many fields, not only traditional natural language processing (NLP) tasks. Recently, research on applying LLMs to the database field has been booming, and as a typical non-relational database, the use of LLMs in graph database research has naturally gained significant attention. Recent efforts have increasingly focused on leveraging… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10434v1-abstract-full').style.display = 'inline'; document.getElementById('2412.10434v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10434v1-abstract-full" style="display: none;"> The emergence of Large Language Models (LLMs) has revolutionized many fields, not only traditional natural language processing (NLP) tasks. Recently, research on applying LLMs to the database field has been booming, and as a typical non-relational database, the use of LLMs in graph database research has naturally gained significant attention. Recent efforts have increasingly focused on leveraging LLMs to translate natural language into graph query language (NL2GQL). Although some progress has been made, these methods have clear limitations, such as their reliance on streamlined processes that often overlook the potential of LLMs to autonomously plan and collaborate with other LLMs in tackling complex NL2GQL challenges. To address this gap, we propose NAT-NL2GQL, a novel multi-agent framework for translating natural language to graph query language. Specifically, our framework consists of three synergistic agents: the Preprocessor agent, the Generator agent, and the Refiner agent. The Preprocessor agent manages data processing as context, including tasks such as name entity recognition, query rewriting, path linking, and the extraction of query-related schemas. The Generator agent is a fine-tuned LLM trained on NL-GQL data, responsible for generating corresponding GQL statements based on queries and their related schemas. The Refiner agent is tasked with refining the GQL or context using error information obtained from the GQL execution results. Given the scarcity of high-quality open-source NL2GQL datasets based on nGQL syntax, we developed StockGQL, a dataset constructed from a financial market graph database. It is available at: https://github.com/leonyuancode/StockGQL. Experimental results on the StockGQL and SpCQL datasets reveal that our method significantly outperforms baseline approaches, highlighting its potential for advancing NL2GQL research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10434v1-abstract-full').style.display = 'none'; document.getElementById('2412.10434v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages,6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08486">arXiv:2412.08486</a> <span> [<a href="https://arxiv.org/pdf/2412.08486">pdf</a>, <a href="https://arxiv.org/format/2412.08486">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning Flow Fields in Attention for Controllable Person Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zijian Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shikun Liu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xiao Han</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haozhe Liu</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+K+W">Kam Woh Ng</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a>, <a href="/search/cs?searchtype=author&query=Cong%2C+Y">Yuren Cong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hang Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Mengmeng Xu</a>, <a href="/search/cs?searchtype=author&query=P%C3%A9rez-R%C3%BAa%2C+J">Juan-Manuel P茅rez-R煤a</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+A">Aditya Patel</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+T">Tao Xiang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+M">Miaojing Shi</a>, <a href="/search/cs?searchtype=author&query=He%2C+S">Sen He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08486v2-abstract-short" style="display: inline;"> Controllable person image generation aims to generate a person image conditioned on reference images, allowing precise control over the person's appearance or pose. However, prior methods often distort fine-grained textural details from the reference image, despite achieving high overall image quality. We attribute these distortions to inadequate attention to corresponding regions in the reference… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08486v2-abstract-full').style.display = 'inline'; document.getElementById('2412.08486v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08486v2-abstract-full" style="display: none;"> Controllable person image generation aims to generate a person image conditioned on reference images, allowing precise control over the person's appearance or pose. However, prior methods often distort fine-grained textural details from the reference image, despite achieving high overall image quality. We attribute these distortions to inadequate attention to corresponding regions in the reference image. To address this, we thereby propose learning flow fields in attention (Leffa), which explicitly guides the target query to attend to the correct reference key in the attention layer during training. Specifically, it is realized via a regularization loss on top of the attention map within a diffusion-based baseline. Our extensive experiments show that Leffa achieves state-of-the-art performance in controlling appearance (virtual try-on) and pose (pose transfer), significantly reducing fine-grained detail distortion while maintaining high image quality. Additionally, we show that our loss is model-agnostic and can be used to improve the performance of other diffusion models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08486v2-abstract-full').style.display = 'none'; document.getElementById('2412.08486v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">github: https://github.com/franciszzj/Leffa, demo: https://huggingface.co/spaces/franciszzj/Leffa, model: https://huggingface.co/franciszzj/Leffa</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07097">arXiv:2412.07097</a> <span> [<a href="https://arxiv.org/pdf/2412.07097">pdf</a>, <a href="https://arxiv.org/format/2412.07097">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> On Evaluating the Durability of Safeguards for Open-Weight LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qi%2C+X">Xiangyu Qi</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+B">Boyi Wei</a>, <a href="/search/cs?searchtype=author&query=Carlini%2C+N">Nicholas Carlini</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yangsibo Huang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tinghao Xie</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Luxi He</a>, <a href="/search/cs?searchtype=author&query=Jagielski%2C+M">Matthew Jagielski</a>, <a href="/search/cs?searchtype=author&query=Nasr%2C+M">Milad Nasr</a>, <a href="/search/cs?searchtype=author&query=Mittal%2C+P">Prateek Mittal</a>, <a href="/search/cs?searchtype=author&query=Henderson%2C+P">Peter Henderson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07097v1-abstract-short" style="display: inline;"> Stakeholders -- from model developers to policymakers -- seek to minimize the dual-use risks of large language models (LLMs). An open challenge to this goal is whether technical safeguards can impede the misuse of LLMs, even when models are customizable via fine-tuning or when model weights are fully open. In response, several recent studies have proposed methods to produce durable LLM safeguards… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07097v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07097v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07097v1-abstract-full" style="display: none;"> Stakeholders -- from model developers to policymakers -- seek to minimize the dual-use risks of large language models (LLMs). An open challenge to this goal is whether technical safeguards can impede the misuse of LLMs, even when models are customizable via fine-tuning or when model weights are fully open. In response, several recent studies have proposed methods to produce durable LLM safeguards for open-weight LLMs that can withstand adversarial modifications of the model's weights via fine-tuning. This holds the promise of raising adversaries' costs even under strong threat models where adversaries can directly fine-tune model weights. However, in this paper, we urge for more careful characterization of the limits of these approaches. Through several case studies, we demonstrate that even evaluating these defenses is exceedingly difficult and can easily mislead audiences into thinking that safeguards are more durable than they really are. We draw lessons from the evaluation pitfalls that we identify and suggest future research carefully cabin claims to more constrained, well-defined, and rigorously examined threat models, which can provide more useful and candid assessments to stakeholders. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07097v1-abstract-full').style.display = 'none'; document.getElementById('2412.07097v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06602">arXiv:2412.06602</a> <span> [<a href="https://arxiv.org/pdf/2412.06602">pdf</a>, <a href="https://arxiv.org/format/2412.06602">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Towards Controllable Speech Synthesis in the Era of Large Language Models: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianxin Xie</a>, <a href="/search/cs?searchtype=author&query=Rong%2C+Y">Yan Rong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Pengfei Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Li Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06602v1-abstract-short" style="display: inline;"> Text-to-speech (TTS), also known as speech synthesis, is a prominent research area that aims to generate natural-sounding human speech from text. Recently, with the increasing industrial demand, TTS technologies have evolved beyond synthesizing human-like speech to enabling controllable speech generation. This includes fine-grained control over various attributes of synthesized speech such as emot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06602v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06602v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06602v1-abstract-full" style="display: none;"> Text-to-speech (TTS), also known as speech synthesis, is a prominent research area that aims to generate natural-sounding human speech from text. Recently, with the increasing industrial demand, TTS technologies have evolved beyond synthesizing human-like speech to enabling controllable speech generation. This includes fine-grained control over various attributes of synthesized speech such as emotion, prosody, timbre, and duration. Besides, advancements in deep learning, such as diffusion and large language models, have significantly enhanced controllable TTS over the past several years. In this paper, we conduct a comprehensive survey of controllable TTS, covering approaches ranging from basic control techniques to methods utilizing natural language prompts, aiming to provide a clear understanding of the current state of research. We examine the general controllable TTS pipeline, challenges, model architectures, and control strategies, offering a comprehensive and clear taxonomy of existing methods. Additionally, we provide a detailed summary of datasets and evaluation metrics and shed some light on the applications and future directions of controllable TTS. To the best of our knowledge, this survey paper provides the first comprehensive review of emerging controllable TTS methods, which can serve as a beneficial resource for both academic researchers and industry practitioners. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06602v1-abstract-full').style.display = 'none'; document.getElementById('2412.06602v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">A comprehensive survey on controllable TTS, 23 pages, 6 tables, 4 figures, 280 references</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06134">arXiv:2412.06134</a> <span> [<a href="https://arxiv.org/pdf/2412.06134">pdf</a>, <a href="https://arxiv.org/format/2412.06134">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Evaluating and Mitigating Social Bias for Large Language Models in Open-ended Settings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhao Liu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xueru Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06134v2-abstract-short" style="display: inline;"> Current social bias benchmarks for Large Language Models (LLMs) primarily rely on pre-defined question formats like multiple-choice, limiting their ability to reflect the complexity and open-ended nature of real-world interactions. To address this gap, we extend an existing BBQ dataset introduced by incorporating fill-in-the-blank and short-answer question types, designed to evaluate biases in an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06134v2-abstract-full').style.display = 'inline'; document.getElementById('2412.06134v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06134v2-abstract-full" style="display: none;"> Current social bias benchmarks for Large Language Models (LLMs) primarily rely on pre-defined question formats like multiple-choice, limiting their ability to reflect the complexity and open-ended nature of real-world interactions. To address this gap, we extend an existing BBQ dataset introduced by incorporating fill-in-the-blank and short-answer question types, designed to evaluate biases in an open-ended setting. Our finding reveals that LLMs tend to produce responses that are more biased against certain protected attributes, like age and socio-economic status. On the other hand, these biased outputs produced by LLMs can serve as valuable contexts and chains of thought for debiasing. Our debiasing approach combined zero-shot, few-shot, and chain-of-thought could significantly reduce the level of bias to almost 0. We open-source our evaluation and debiasing code hoping to encourage further measurements and mitigation of bias and stereotype in LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06134v2-abstract-full').style.display = 'none'; document.getElementById('2412.06134v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 panges</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.04454">arXiv:2412.04454</a> <span> [<a href="https://arxiv.org/pdf/2412.04454">pdf</a>, <a href="https://arxiv.org/format/2412.04454">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Aguvis: Unified Pure Vision Agents for Autonomous GUI Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yiheng Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zekun Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junli Wang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+D">Dunjie Lu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianbao Xie</a>, <a href="/search/cs?searchtype=author&query=Saha%2C+A">Amrita Saha</a>, <a href="/search/cs?searchtype=author&query=Sahoo%2C+D">Doyen Sahoo</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tao Yu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+C">Caiming Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.04454v1-abstract-short" style="display: inline;"> Graphical User Interfaces (GUIs) are critical to human-computer interaction, yet automating GUI tasks remains challenging due to the complexity and variability of visual environments. Existing approaches often rely on textual representations of GUIs, which introduce limitations in generalization, efficiency, and scalability. In this paper, we introduce Aguvis, a unified pure vision-based framework… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04454v1-abstract-full').style.display = 'inline'; document.getElementById('2412.04454v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.04454v1-abstract-full" style="display: none;"> Graphical User Interfaces (GUIs) are critical to human-computer interaction, yet automating GUI tasks remains challenging due to the complexity and variability of visual environments. Existing approaches often rely on textual representations of GUIs, which introduce limitations in generalization, efficiency, and scalability. In this paper, we introduce Aguvis, a unified pure vision-based framework for autonomous GUI agents that operates across various platforms. Our approach leverages image-based observations, and grounding instructions in natural language to visual elements, and employs a consistent action space to ensure cross-platform generalization. To address the limitations of previous work, we integrate explicit planning and reasoning within the model, enhancing its ability to autonomously navigate and interact with complex digital environments. We construct a large-scale dataset of GUI agent trajectories, incorporating multimodal reasoning and grounding, and employ a two-stage training pipeline that first focuses on general GUI grounding, followed by planning and reasoning. Through comprehensive experiments, we demonstrate that Aguvis surpasses previous state-of-the-art methods in both offline and real-world online scenarios, achieving, to our knowledge, the first fully autonomous pure vision GUI agent capable of performing tasks independently without collaboration with external closed-source models. We open-sourced all datasets, models, and training recipes to facilitate future research at https://aguvis-project.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04454v1-abstract-full').style.display = 'none'; document.getElementById('2412.04454v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://aguvis-project.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.02906">arXiv:2412.02906</a> <span> [<a href="https://arxiv.org/pdf/2412.02906">pdf</a>, <a href="https://arxiv.org/format/2412.02906">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Does Few-Shot Learning Help LLM Performance in Code Synthesis? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+D">Derek Xu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tong Xie</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+B">Botao Xia</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haoyu Li</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yunsheng Bai</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yizhou Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.02906v1-abstract-short" style="display: inline;"> Large language models (LLMs) have made significant strides at code generation through improved model design, training, and chain-of-thought. However, prompt-level optimizations remain an important yet under-explored aspect of LLMs for coding. This work focuses on the few-shot examples present in most code generation prompts, offering a systematic study on whether few-shot examples improve LLM's co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02906v1-abstract-full').style.display = 'inline'; document.getElementById('2412.02906v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.02906v1-abstract-full" style="display: none;"> Large language models (LLMs) have made significant strides at code generation through improved model design, training, and chain-of-thought. However, prompt-level optimizations remain an important yet under-explored aspect of LLMs for coding. This work focuses on the few-shot examples present in most code generation prompts, offering a systematic study on whether few-shot examples improve LLM's coding capabilities, which few-shot examples have the largest impact, and how to select impactful examples. Our work offers 2 approaches for selecting few-shot examples, a model-free method, CODEEXEMPLAR-FREE, and a model-based method, CODEEXEMPLAR-BASED. The 2 methods offer a trade-off between improved performance and reliance on training data and interpretability. Both methods significantly improve CodeLlama's coding ability across the popular HumanEval+ coding benchmark. In summary, our work provides valuable insights into how to pick few-shot examples in code generation prompts to improve LLM code generation capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02906v1-abstract-full').style.display = 'none'; document.getElementById('2412.02906v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01065">arXiv:2412.01065</a> <span> [<a href="https://arxiv.org/pdf/2412.01065">pdf</a>, <a href="https://arxiv.org/format/2412.01065">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Lookahead Counterfactual Fairness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zuo%2C+Z">Zhiqun Zuo</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xuwei Tan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xueru Zhang</a>, <a href="/search/cs?searchtype=author&query=Khalili%2C+M+M">Mohammad Mahdi Khalili</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01065v1-abstract-short" style="display: inline;"> As machine learning (ML) algorithms are used in applications that involve humans, concerns have arisen that these algorithms may be biased against certain social groups. \textit{Counterfactual fairness} (CF) is a fairness notion proposed in Kusner et al. (2017) that measures the unfairness of ML predictions; it requires that the prediction perceived by an individual in the real world has the same… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01065v1-abstract-full').style.display = 'inline'; document.getElementById('2412.01065v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01065v1-abstract-full" style="display: none;"> As machine learning (ML) algorithms are used in applications that involve humans, concerns have arisen that these algorithms may be biased against certain social groups. \textit{Counterfactual fairness} (CF) is a fairness notion proposed in Kusner et al. (2017) that measures the unfairness of ML predictions; it requires that the prediction perceived by an individual in the real world has the same marginal distribution as it would be in a counterfactual world, in which the individual belongs to a different group. Although CF ensures fair ML predictions, it fails to consider the downstream effects of ML predictions on individuals. Since humans are strategic and often adapt their behaviors in response to the ML system, predictions that satisfy CF may not lead to a fair future outcome for the individuals. In this paper, we introduce \textit{lookahead counterfactual fairness} (LCF), a fairness notion accounting for the downstream effects of ML models which requires the individual \textit{future status} to be counterfactually fair. We theoretically identify conditions under which LCF can be satisfied and propose an algorithm based on the theorems. We also extend the concept to path-dependent fairness. Experiments on both synthetic and real data validate the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01065v1-abstract-full').style.display = 'none'; document.getElementById('2412.01065v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18312">arXiv:2411.18312</a> <span> [<a href="https://arxiv.org/pdf/2411.18312">pdf</a>, <a href="https://arxiv.org/ps/2411.18312">ps</a>, <a href="https://arxiv.org/format/2411.18312">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Undirected 3-Fault Replacement Path in Nearly Cubic Time </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chi%2C+S">Shucheng Chi</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+R">Ran Duan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Benyu Wang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianle Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18312v1-abstract-short" style="display: inline;"> Given a graph $G=(V,E)$ and two vertices $s,t\in V$, the $f$-fault replacement path ($f$FRP) problem computes for every set of edges $F$ where $|F|\leq f$, the distance from $s$ to $t$ when edges in $F$ fail. A recent result shows that 2FRP in directed graphs can be solved in $\tilde{O}(n^3)$ time [arXiv:2209.07016]. In this paper, we show a 3FRP algorithm in deterministic $\tilde{O}(n^3)$ time fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18312v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18312v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18312v1-abstract-full" style="display: none;"> Given a graph $G=(V,E)$ and two vertices $s,t\in V$, the $f$-fault replacement path ($f$FRP) problem computes for every set of edges $F$ where $|F|\leq f$, the distance from $s$ to $t$ when edges in $F$ fail. A recent result shows that 2FRP in directed graphs can be solved in $\tilde{O}(n^3)$ time [arXiv:2209.07016]. In this paper, we show a 3FRP algorithm in deterministic $\tilde{O}(n^3)$ time for undirected weighted graphs, which almost matches the size of the output. This implies that $f$FRP in undirected graphs can be solved in almost optimal $\tilde{O}(n^f)$ time for all $f\geq 3$. To construct our 3FRP algorithm, we introduce an incremental distance sensitivity oracle (DSO) with $\tilde{O}(n^2)$ worst-case update time, while preprocessing time, space, and query time are still $\tilde{O}(n^3)$, $\tilde{O}(n^2)$ and $\tilde{O}(1)$, respectively, which match the static DSO [Bernstein and Karger 2009]. Here in a DSO, we can preprocess a graph so that the distance between any pair of vertices given any failed edge can be answered efficiently. From the recent result in [arXiv:2211.05178], we can obtain an offline dynamic DSO from the incremental worst-case DSO, which makes the construction of our 3FRP algorithm more convenient. By the offline dynamic DSO, we can also construct a 2-fault single-source replacement path (2-fault SSRP) algorithm in $\tilde{O}(n^3)$ time, that is, from a given vertex $s$, we want to find the distance to any vertex $t$ when any pair of edges fail. Thus the $\tilde{O}(n^3)$ time complexity for 2-fault SSRP is also almost optimal. Now we know that in undirected graphs 1FRP can be solved in $\tilde{O}(m)$ time [Nardelli, Proietti, Widmayer 2001], and 2FRP and 3FRP in undirected graphs can be solved in $\tilde{O}(n^3)$ time. In this paper, we also show that a truly subcubic algorithm for 2FRP in undirected graphs does not exist under APSP-hardness conjecture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18312v1-abstract-full').style.display = 'none'; document.getElementById('2411.18312v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17189">arXiv:2411.17189</a> <span> [<a href="https://arxiv.org/pdf/2411.17189">pdf</a>, <a href="https://arxiv.org/format/2411.17189">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PhysMotion: Physics-Grounded Dynamics From a Single Image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xiyang Tan</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Ying Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuan Li</a>, <a href="/search/cs?searchtype=author&query=Zong%2C+Z">Zeshun Zong</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyi Xie</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yin Yang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17189v2-abstract-short" style="display: inline;"> We introduce PhysMotion, a novel framework that leverages principled physics-based simulations to guide intermediate 3D representations generated from a single image and input conditions (e.g., applied force and torque), producing high-quality, physically plausible video generation. By utilizing continuum mechanics-based simulations as a prior knowledge, our approach addresses the limitations of t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17189v2-abstract-full').style.display = 'inline'; document.getElementById('2411.17189v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17189v2-abstract-full" style="display: none;"> We introduce PhysMotion, a novel framework that leverages principled physics-based simulations to guide intermediate 3D representations generated from a single image and input conditions (e.g., applied force and torque), producing high-quality, physically plausible video generation. By utilizing continuum mechanics-based simulations as a prior knowledge, our approach addresses the limitations of traditional data-driven generative models and result in more consistent physically plausible motions. Our framework begins by reconstructing a feed-forward 3D Gaussian from a single image through geometry optimization. This representation is then time-stepped using a differentiable Material Point Method (MPM) with continuum mechanics-based elastoplasticity models, which provides a strong foundation for realistic dynamics, albeit at a coarse level of detail. To enhance the geometry, appearance and ensure spatiotemporal consistency, we refine the initial simulation using a text-to-image (T2I) diffusion model with cross-frame attention, resulting in a physically plausible video that retains intricate details comparable to the input image. We conduct comprehensive qualitative and quantitative evaluations to validate the efficacy of our method. Our project page is available at: https://supertan0204.github.io/physmotion_website/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17189v2-abstract-full').style.display = 'none'; document.getElementById('2411.17189v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://supertan0204.github.io/physmotion_website/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12168">arXiv:2411.12168</a> <span> [<a href="https://arxiv.org/pdf/2411.12168">pdf</a>, <a href="https://arxiv.org/format/2411.12168">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Sketch-guided Cage-based 3D Gaussian Splatting Deformation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianhao Xie</a>, <a href="/search/cs?searchtype=author&query=Aigerman%2C+N">Noam Aigerman</a>, <a href="/search/cs?searchtype=author&query=Belilovsky%2C+E">Eugene Belilovsky</a>, <a href="/search/cs?searchtype=author&query=Popa%2C+T">Tiberiu Popa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12168v2-abstract-short" style="display: inline;"> 3D Gaussian Splatting (GS) is one of the most promising novel 3D representations that has received great interest in computer graphics and computer vision. While various systems have introduced editing capabilities for 3D GS, such as those guided by text prompts, fine-grained control over deformation remains an open challenge. In this work, we present a novel sketch-guided 3D GS deformation system… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12168v2-abstract-full').style.display = 'inline'; document.getElementById('2411.12168v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12168v2-abstract-full" style="display: none;"> 3D Gaussian Splatting (GS) is one of the most promising novel 3D representations that has received great interest in computer graphics and computer vision. While various systems have introduced editing capabilities for 3D GS, such as those guided by text prompts, fine-grained control over deformation remains an open challenge. In this work, we present a novel sketch-guided 3D GS deformation system that allows users to intuitively modify the geometry of a 3D GS model by drawing a silhouette sketch from a single viewpoint. Our approach introduces a new deformation method that combines cage-based deformations with a variant of Neural Jacobian Fields, enabling precise, fine-grained control. Additionally, it leverages large-scale 2D diffusion priors and ControlNet to ensure the generated deformations are semantically plausible. Through a series of experiments, we demonstrate the effectiveness of our method and showcase its ability to animate static 3D GS models as one of its key applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12168v2-abstract-full').style.display = 'none'; document.getElementById('2411.12168v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 9 figures, project page: https://tianhaoxie.github.io/project/gs_deform/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12136">arXiv:2411.12136</a> <span> [<a href="https://arxiv.org/pdf/2411.12136">pdf</a>, <a href="https://arxiv.org/format/2411.12136">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Visualizing Loss Functions as Topological Landscape Profiles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Geniesse%2C+C">Caleb Geniesse</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiaqing Chen</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tiankai Xie</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+G">Ge Shi</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yaoqing Yang</a>, <a href="/search/cs?searchtype=author&query=Morozov%2C+D">Dmitriy Morozov</a>, <a href="/search/cs?searchtype=author&query=Perciano%2C+T">Talita Perciano</a>, <a href="/search/cs?searchtype=author&query=Mahoney%2C+M+W">Michael W. Mahoney</a>, <a href="/search/cs?searchtype=author&query=Maciejewski%2C+R">Ross Maciejewski</a>, <a href="/search/cs?searchtype=author&query=Weber%2C+G+H">Gunther H. Weber</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12136v1-abstract-short" style="display: inline;"> In machine learning, a loss function measures the difference between model predictions and ground-truth (or target) values. For neural network models, visualizing how this loss changes as model parameters are varied can provide insights into the local structure of the so-called loss landscape (e.g., smoothness) as well as global properties of the underlying model (e.g., generalization performance)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12136v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12136v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12136v1-abstract-full" style="display: none;"> In machine learning, a loss function measures the difference between model predictions and ground-truth (or target) values. For neural network models, visualizing how this loss changes as model parameters are varied can provide insights into the local structure of the so-called loss landscape (e.g., smoothness) as well as global properties of the underlying model (e.g., generalization performance). While various methods for visualizing the loss landscape have been proposed, many approaches limit sampling to just one or two directions, ignoring potentially relevant information in this extremely high-dimensional space. This paper introduces a new representation based on topological data analysis that enables the visualization of higher-dimensional loss landscapes. After describing this new topological landscape profile representation, we show how the shape of loss landscapes can reveal new details about model performance and learning dynamics, highlighting several use cases, including image segmentation (e.g., UNet) and scientific machine learning (e.g., physics-informed neural networks). Through these examples, we provide new insights into how loss landscapes vary across distinct hyperparameter spaces: we find that the topology of the loss landscape is simpler for better-performing models; and we observe greater variation in the shape of loss landscapes near transitions from low to high model performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12136v1-abstract-full').style.display = 'none'; document.getElementById('2411.12136v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12000">arXiv:2411.12000</a> <span> [<a href="https://arxiv.org/pdf/2411.12000">pdf</a>, <a href="https://arxiv.org/format/2411.12000">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ByteScience: Bridging Unstructured Scientific Literature and Structured Data with Auto Fine-tuned Large Language Model in Token Granularity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tong Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hanzhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shaozhou Wang</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+Y">Yuwei Wan</a>, <a href="/search/cs?searchtype=author&query=Razzak%2C+I">Imran Razzak</a>, <a href="/search/cs?searchtype=author&query=Kit%2C+C">Chunyu Kit</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenjie Zhang</a>, <a href="/search/cs?searchtype=author&query=Hoex%2C+B">Bram Hoex</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12000v2-abstract-short" style="display: inline;"> Natural Language Processing (NLP) is widely used to supply summarization ability from long context to structured information. However, extracting structured knowledge from scientific text by NLP models remains a challenge because of its domain-specific nature to complex data preprocessing and the granularity of multi-layered device-level information. To address this, we introduce ByteScience, a no… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12000v2-abstract-full').style.display = 'inline'; document.getElementById('2411.12000v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12000v2-abstract-full" style="display: none;"> Natural Language Processing (NLP) is widely used to supply summarization ability from long context to structured information. However, extracting structured knowledge from scientific text by NLP models remains a challenge because of its domain-specific nature to complex data preprocessing and the granularity of multi-layered device-level information. To address this, we introduce ByteScience, a non-profit cloud-based auto fine-tuned Large Language Model (LLM) platform, which is designed to extract structured scientific data and synthesize new scientific knowledge from vast scientific corpora. The platform capitalizes on DARWIN, an open-source, fine-tuned LLM dedicated to natural science. The platform was built on Amazon Web Services (AWS) and provides an automated, user-friendly workflow for custom model development and data extraction. The platform achieves remarkable accuracy with only a small amount of well-annotated articles. This innovative tool streamlines the transition from the science literature to structured knowledge and data and benefits the advancements in natural informatics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12000v2-abstract-full').style.display = 'none'; document.getElementById('2411.12000v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11148">arXiv:2411.11148</a> <span> [<a href="https://arxiv.org/pdf/2411.11148">pdf</a>, <a href="https://arxiv.org/format/2411.11148">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TabDeco: A Comprehensive Contrastive Framework for Decoupled Representations in Tabular Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Suiyao Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jing Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yunxiao Wang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+C">Cheng Ji</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianpei Xie</a>, <a href="/search/cs?searchtype=author&query=Cociorva%2C+D">Daniel Cociorva</a>, <a href="/search/cs?searchtype=author&query=Sharps%2C+M">Michael Sharps</a>, <a href="/search/cs?searchtype=author&query=Levasseur%2C+C">Cecile Levasseur</a>, <a href="/search/cs?searchtype=author&query=Brunzell%2C+H">Hakan Brunzell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11148v1-abstract-short" style="display: inline;"> Representation learning is a fundamental aspect of modern artificial intelligence, driving substantial improvements across diverse applications. While selfsupervised contrastive learning has led to significant advancements in fields like computer vision and natural language processing, its adaptation to tabular data presents unique challenges. Traditional approaches often prioritize optimizing mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11148v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11148v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11148v1-abstract-full" style="display: none;"> Representation learning is a fundamental aspect of modern artificial intelligence, driving substantial improvements across diverse applications. While selfsupervised contrastive learning has led to significant advancements in fields like computer vision and natural language processing, its adaptation to tabular data presents unique challenges. Traditional approaches often prioritize optimizing model architecture and loss functions but may overlook the crucial task of constructing meaningful positive and negative sample pairs from various perspectives like feature interactions, instance-level patterns and batch-specific contexts. To address these challenges, we introduce TabDeco, a novel method that leverages attention-based encoding strategies across both rows and columns and employs contrastive learning framework to effectively disentangle feature representations at multiple levels, including features, instances and data batches. With the innovative feature decoupling hierarchies, TabDeco consistently surpasses existing deep learning methods and leading gradient boosting algorithms, including XG-Boost, CatBoost, and LightGBM, across various benchmark tasks, underscoring its effectiveness in advancing tabular data representation learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11148v1-abstract-full').style.display = 'none'; document.getElementById('2411.11148v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11006">arXiv:2411.11006</a> <span> [<a href="https://arxiv.org/pdf/2411.11006">pdf</a>, <a href="https://arxiv.org/format/2411.11006">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> BackdoorMBTI: A Backdoor Learning Multimodal Benchmark Tool Kit for Backdoor Defense Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+H">Haiyang Yu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+J">Jiaping Gui</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pengyang Wang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+P">Ping Yi</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yue Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11006v1-abstract-short" style="display: inline;"> We introduce BackdoorMBTI, the first backdoor learning toolkit and benchmark designed for multimodal evaluation across three representative modalities from eleven commonly used datasets. BackdoorMBTI provides a systematic backdoor learning pipeline, encompassing data processing, data poisoning, backdoor training, and evaluation. The generated poison datasets and backdoor models enable detailed eva… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11006v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11006v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11006v1-abstract-full" style="display: none;"> We introduce BackdoorMBTI, the first backdoor learning toolkit and benchmark designed for multimodal evaluation across three representative modalities from eleven commonly used datasets. BackdoorMBTI provides a systematic backdoor learning pipeline, encompassing data processing, data poisoning, backdoor training, and evaluation. The generated poison datasets and backdoor models enable detailed evaluation of backdoor defense methods. Given the diversity of modalities, BackdoorMBTI facilitates systematic evaluation across different data types. Furthermore, BackdoorMBTI offers a standardized approach to handling practical factors in backdoor learning, such as issues related to data quality and erroneous labels. We anticipate that BackdoorMBTI will expedite future research in backdoor defense methods within a multimodal context. Code is available at https://anonymous.4open.science/r/BackdoorMBTI-D6A1/README.md. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11006v1-abstract-full').style.display = 'none'; document.getElementById('2411.11006v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09807">arXiv:2411.09807</a> <span> [<a href="https://arxiv.org/pdf/2411.09807">pdf</a>, <a href="https://arxiv.org/format/2411.09807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Loss Landscapes from a Topology Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tiankai Xie</a>, <a href="/search/cs?searchtype=author&query=Geniesse%2C+C">Caleb Geniesse</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiaqing Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yaoqing Yang</a>, <a href="/search/cs?searchtype=author&query=Morozov%2C+D">Dmitriy Morozov</a>, <a href="/search/cs?searchtype=author&query=Mahoney%2C+M+W">Michael W. Mahoney</a>, <a href="/search/cs?searchtype=author&query=Maciejewski%2C+R">Ross Maciejewski</a>, <a href="/search/cs?searchtype=author&query=Weber%2C+G+H">Gunther H. Weber</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09807v1-abstract-short" style="display: inline;"> Characterizing the loss of a neural network with respect to model parameters, i.e., the loss landscape, can provide valuable insights into properties of that model. Various methods for visualizing loss landscapes have been proposed, but less emphasis has been placed on quantifying and extracting actionable and reproducible insights from these complex representations. Inspired by powerful tools fro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09807v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09807v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09807v1-abstract-full" style="display: none;"> Characterizing the loss of a neural network with respect to model parameters, i.e., the loss landscape, can provide valuable insights into properties of that model. Various methods for visualizing loss landscapes have been proposed, but less emphasis has been placed on quantifying and extracting actionable and reproducible insights from these complex representations. Inspired by powerful tools from topological data analysis (TDA) for summarizing the structure of high-dimensional data, here we characterize the underlying shape (or topology) of loss landscapes, quantifying the topology to reveal new insights about neural networks. To relate our findings to the machine learning (ML) literature, we compute simple performance metrics (e.g., accuracy, error), and we characterize the local structure of loss landscapes using Hessian-based metrics (e.g., largest eigenvalue, trace, eigenvalue spectral density). Following this approach, we study established models from image pattern recognition (e.g., ResNets) and scientific ML (e.g., physics-informed neural networks), and we show how quantifying the shape of loss landscapes can provide new insights into model performance and learning dynamics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09807v1-abstract-full').style.display = 'none'; document.getElementById('2411.09807v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06774">arXiv:2411.06774</a> <span> [<a href="https://arxiv.org/pdf/2411.06774">pdf</a>, <a href="https://arxiv.org/format/2411.06774">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> The First Prompt Counts the Most! An Evaluation of Large Language Models on Iterative Example-based Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yingjie Fu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bozhou Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyi Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wentao Zhang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06774v1-abstract-short" style="display: inline;"> The capabilities of Large Language Models (LLMs) in code generation, particularly for implementing target functionalities from natural language descriptions, have been extensively studied. As an alternative form of natural language, input-output examples (I/O examples) provide an accessible, unambiguous, and flexible way to describe functionalities, but the diversity, sparseness, and incompletenes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06774v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06774v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06774v1-abstract-full" style="display: none;"> The capabilities of Large Language Models (LLMs) in code generation, particularly for implementing target functionalities from natural language descriptions, have been extensively studied. As an alternative form of natural language, input-output examples (I/O examples) provide an accessible, unambiguous, and flexible way to describe functionalities, but the diversity, sparseness, and incompleteness of I/O examples also place challenges on understanding and implementing requirements. Therefore, generating code from input-output examples (i.e., example-based code generation) provides a new perspective, allowing us to evaluate LLMs' capability to infer target functionalities from limited information and to process new-form requirements. However, related research about LLMs in example-based code generation remains largely unexplored. To fill this gap, this paper presents the first comprehensive study on example-based code generation using LLMs. To address the incorrectness caused by the incompleteness of I/O examples, we adopt an iterative evaluation framework and formalize the objective of example-based code generation as two sequential sub-objectives: generating code conforming to given examples and generating code that successfully implements the target functionalities from (iteratively) given examples. We assess six state-of-the-art LLMs using a new benchmark of 168 diverse target functionalities. The results demonstrate that when requirements were described using iterative I/O examples rather than natural language, the LLMs' score decreased by over 60%, indicating that example-based code generation remains challenging for the evaluated LLMs. More interestingly, the vast majority (even over 95%) of successfully implemented functionalities are achieved in the first round of iterations, suggesting that the LLMs struggle to effectively utilize the iteratively supplemented requirements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06774v1-abstract-full').style.display = 'none'; document.getElementById('2411.06774v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06353">arXiv:2411.06353</a> <span> [<a href="https://arxiv.org/pdf/2411.06353">pdf</a>, <a href="https://arxiv.org/format/2411.06353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep Active Learning in the Open World </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jifan Zhang</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+H">Haoyue Bai</a>, <a href="/search/cs?searchtype=author&query=Nowak%2C+R">Robert Nowak</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06353v1-abstract-short" style="display: inline;"> Machine learning models deployed in open-world scenarios often encounter unfamiliar conditions and perform poorly in unanticipated situations. As AI systems advance and find application in safety-critical domains, effectively handling out-of-distribution (OOD) data is crucial to building open-world learning systems. In this work, we introduce ALOE, a novel active learning algorithm for open-world… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06353v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06353v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06353v1-abstract-full" style="display: none;"> Machine learning models deployed in open-world scenarios often encounter unfamiliar conditions and perform poorly in unanticipated situations. As AI systems advance and find application in safety-critical domains, effectively handling out-of-distribution (OOD) data is crucial to building open-world learning systems. In this work, we introduce ALOE, a novel active learning algorithm for open-world environments designed to enhance model adaptation by incorporating new OOD classes via a two-stage approach. First, diversity sampling selects a representative set of examples, followed by energy-based OOD detection to prioritize likely unknown classes for annotation. This strategy accelerates class discovery and learning, even under constrained annotation budgets. Evaluations on three long-tailed image classification benchmarks demonstrate that ALOE outperforms traditional active learning baselines, effectively expanding known categories while balancing annotation cost. Our findings reveal a crucial tradeoff between enhancing known-class performance and discovering new classes, setting the stage for future advancements in open-world machine learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06353v1-abstract-full').style.display = 'none'; document.getElementById('2411.06353v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02397">arXiv:2411.02397</a> <span> [<a href="https://arxiv.org/pdf/2411.02397">pdf</a>, <a href="https://arxiv.org/format/2411.02397">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Caching for Faster Video Generation with Diffusion Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kahatapitiya%2C+K">Kumara Kahatapitiya</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haozhe Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+S">Sen He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Ding Liu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+M">Menglin Jia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chenyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Ryoo%2C+M+S">Michael S. Ryoo</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02397v2-abstract-short" style="display: inline;"> Generating temporally-consistent high-fidelity videos can be computationally expensive, especially over longer temporal spans. More-recent Diffusion Transformers (DiTs) -- despite making significant headway in this context -- have only heightened such challenges as they rely on larger models and heavier attention mechanisms, resulting in slower inference speeds. In this paper, we introduce a train… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02397v2-abstract-full').style.display = 'inline'; document.getElementById('2411.02397v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02397v2-abstract-full" style="display: none;"> Generating temporally-consistent high-fidelity videos can be computationally expensive, especially over longer temporal spans. More-recent Diffusion Transformers (DiTs) -- despite making significant headway in this context -- have only heightened such challenges as they rely on larger models and heavier attention mechanisms, resulting in slower inference speeds. In this paper, we introduce a training-free method to accelerate video DiTs, termed Adaptive Caching (AdaCache), which is motivated by the fact that "not all videos are created equal": meaning, some videos require fewer denoising steps to attain a reasonable quality than others. Building on this, we not only cache computations through the diffusion process, but also devise a caching schedule tailored to each video generation, maximizing the quality-latency trade-off. We further introduce a Motion Regularization (MoReg) scheme to utilize video information within AdaCache, essentially controlling the compute allocation based on motion content. Altogether, our plug-and-play contributions grant significant inference speedups (e.g. up to 4.7x on Open-Sora 720p - 2s video generation) without sacrificing the generation quality, across multiple video DiT baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02397v2-abstract-full').style.display = 'none'; document.getElementById('2411.02397v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project-page is available at https://adacache-dit.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18603">arXiv:2410.18603</a> <span> [<a href="https://arxiv.org/pdf/2410.18603">pdf</a>, <a href="https://arxiv.org/format/2410.18603">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> AgentStore: Scalable Integration of Heterogeneous Agents As Specialized Generalist Computer Assistant </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jia%2C+C">Chengyou Jia</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+M">Minnan Luo</a>, <a href="/search/cs?searchtype=author&query=Dang%2C+Z">Zhuohang Dang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Q">Qiushi Sun</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fangzhi Xu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Junlin Hu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianbao Xie</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhiyong Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18603v1-abstract-short" style="display: inline;"> Digital agents capable of automating complex computer tasks have attracted considerable attention due to their immense potential to enhance human-computer interaction. However, existing agent methods exhibit deficiencies in their generalization and specialization capabilities, especially in handling open-ended computer tasks in real-world environments. Inspired by the rich functionality of the App… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18603v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18603v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18603v1-abstract-full" style="display: none;"> Digital agents capable of automating complex computer tasks have attracted considerable attention due to their immense potential to enhance human-computer interaction. However, existing agent methods exhibit deficiencies in their generalization and specialization capabilities, especially in handling open-ended computer tasks in real-world environments. Inspired by the rich functionality of the App store, we present AgentStore, a scalable platform designed to dynamically integrate heterogeneous agents for automating computer tasks. AgentStore empowers users to integrate third-party agents, allowing the system to continuously enrich its capabilities and adapt to rapidly evolving operating systems. Additionally, we propose a novel core \textbf{MetaAgent} with the \textbf{AgentToken} strategy to efficiently manage diverse agents and utilize their specialized and generalist abilities for both domain-specific and system-wide tasks. Extensive experiments on three challenging benchmarks demonstrate that AgentStore surpasses the limitations of previous systems with narrow capabilities, particularly achieving a significant improvement from 11.21\% to 23.85\% on the OSWorld benchmark, more than doubling the previous results. Comprehensive quantitative and qualitative results further demonstrate AgentStore's ability to enhance agent systems in both generalization and specialization, underscoring its potential for developing the specialized generalist computer assistant. All our codes will be made publicly available in https://chengyou-jia.github.io/AgentStore-Home. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18603v1-abstract-full').style.display = 'none'; document.getElementById('2410.18603v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18075">arXiv:2410.18075</a> <span> [<a href="https://arxiv.org/pdf/2410.18075">pdf</a>, <a href="https://arxiv.org/format/2410.18075">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> ProFL: Performative Robust Optimal Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xue Zheng</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xuwei Tan</a>, <a href="/search/cs?searchtype=author&query=Yener%2C+A">Aylin Yener</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xueru Zhang</a>, <a href="/search/cs?searchtype=author&query=Payani%2C+A">Ali Payani</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M">Myungjin Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18075v1-abstract-short" style="display: inline;"> Performative prediction (PP) is a framework that captures distribution shifts that occur during the training of machine learning models due to their deployment. As the trained model is used, its generated data could cause the model to evolve, leading to deviations from the original data distribution. The impact of such model-induced distribution shifts in the federated learning (FL) setup remains… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18075v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18075v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18075v1-abstract-full" style="display: none;"> Performative prediction (PP) is a framework that captures distribution shifts that occur during the training of machine learning models due to their deployment. As the trained model is used, its generated data could cause the model to evolve, leading to deviations from the original data distribution. The impact of such model-induced distribution shifts in the federated learning (FL) setup remains unexplored despite being increasingly likely to transpire in real-life use cases. Although Jin et al. (2024) recently extended PP to FL in a straightforward manner, the resulting model only converges to a performative stable point, which may be far from optimal. The methods in Izzo et al. (2021); Miller et al. (2021) can find a performative optimal point in centralized settings, but they require the performative risk to be convex and the training data to be noiseless, assumptions often violated in realistic FL systems. This paper overcomes all of these shortcomings and proposes Performative robust optimal Federated Learning (ProFL), an algorithm that finds performative optimal points in FL from noisy and contaminated data. We present the convergence analysis under the Polyak-Lojasiewicz condition, which applies to non-convex objectives. Extensive experiments on multiple datasets validate our proposed algorithms' efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18075v1-abstract-full').style.display = 'none'; document.getElementById('2410.18075v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages with Appendix, 18 figures. The paper has been submitted and is currently under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16165">arXiv:2410.16165</a> <span> [<a href="https://arxiv.org/pdf/2410.16165">pdf</a>, <a href="https://arxiv.org/format/2410.16165">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> From Tokens to Materials: Leveraging Language Models for Scientific Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wan%2C+Y">Yuwei Wan</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tong Xie</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+N">Nan Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenjie Zhang</a>, <a href="/search/cs?searchtype=author&query=Kit%2C+C">Chunyu Kit</a>, <a href="/search/cs?searchtype=author&query=Hoex%2C+B">Bram Hoex</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16165v2-abstract-short" style="display: inline;"> Exploring the predictive capabilities of language models in material science is an ongoing interest. This study investigates the application of language model embeddings to enhance material property prediction in materials science. By evaluating various contextual embedding methods and pre-trained models, including Bidirectional Encoder Representations from Transformers (BERT) and Generative Pre-t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16165v2-abstract-full').style.display = 'inline'; document.getElementById('2410.16165v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16165v2-abstract-full" style="display: none;"> Exploring the predictive capabilities of language models in material science is an ongoing interest. This study investigates the application of language model embeddings to enhance material property prediction in materials science. By evaluating various contextual embedding methods and pre-trained models, including Bidirectional Encoder Representations from Transformers (BERT) and Generative Pre-trained Transformers (GPT), we demonstrate that domain-specific models, particularly MatBERT significantly outperform general-purpose models in extracting implicit knowledge from compound names and material properties. Our findings reveal that information-dense embeddings from the third layer of MatBERT, combined with a context-averaging approach, offer the most effective method for capturing material-property relationships from the scientific literature. We also identify a crucial "tokenizer effect," highlighting the importance of specialized text processing techniques that preserve complete compound names while maintaining consistent token counts. These insights underscore the value of domain-specific training and tokenization in materials science applications and offer a promising pathway for accelerating the discovery and development of new materials through AI-driven approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16165v2-abstract-full').style.display = 'none'; document.getElementById('2410.16165v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15756">arXiv:2410.15756</a> <span> [<a href="https://arxiv.org/pdf/2410.15756">pdf</a>, <a href="https://arxiv.org/format/2410.15756">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Automated Proof Generation for Rust Code via Self-Evolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tianyu Chen</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Shuai Lu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Shan Lu</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Y">Yeyun Gong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chenyuan Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuheng Li</a>, <a href="/search/cs?searchtype=author&query=Misu%2C+M+R+H">Md Rakib Hossain Misu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hao Yu</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+N">Nan Duan</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+P">Peng Cheng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&query=Lahiri%2C+S+K">Shuvendu K Lahiri</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+L">Lidong Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15756v1-abstract-short" style="display: inline;"> Ensuring correctness is crucial for code generation. Formal verification offers a definitive assurance of correctness, but demands substantial human effort in proof construction and hence raises a pressing need for automation. The primary obstacle lies in the severe lack of data - there is much less proof than code for LLMs to train upon. In this paper, we introduce SAFE, a novel framework that ov… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15756v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15756v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15756v1-abstract-full" style="display: none;"> Ensuring correctness is crucial for code generation. Formal verification offers a definitive assurance of correctness, but demands substantial human effort in proof construction and hence raises a pressing need for automation. The primary obstacle lies in the severe lack of data - there is much less proof than code for LLMs to train upon. In this paper, we introduce SAFE, a novel framework that overcomes the lack of human-written proof to enable automated proof generation of Rust code. SAFE establishes a self-evolving cycle where data synthesis and fine-tuning collaborate to enhance the model capability, leveraging the definitive power of a symbolic verifier in telling correct proof from incorrect ones. SAFE also re-purposes the large number of synthesized incorrect proofs to train the self-debugging capability of the fine-tuned models, empowering them to fix incorrect proofs based on the verifier's feedback. SAFE demonstrates superior efficiency and precision compared to GPT-4o. Through tens of thousands of synthesized proofs and the self-debugging mechanism, we improve the capability of open-source models, initially unacquainted with formal verification, to automatically write proof for Rust code. This advancement leads to a significant improvement in performance, achieving a 70.50% accuracy rate in a benchmark crafted by human experts, a significant leap over GPT-4o's performance of 24.46%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15756v1-abstract-full').style.display = 'none'; document.getElementById('2410.15756v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15336">arXiv:2410.15336</a> <span> [<a href="https://arxiv.org/pdf/2410.15336">pdf</a>, <a href="https://arxiv.org/format/2410.15336">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Diffusion-PINN Sampler </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zhekun Shi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Longlin Yu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyu Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Cheng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15336v1-abstract-short" style="display: inline;"> Recent success of diffusion models has inspired a surge of interest in developing sampling techniques using reverse diffusion processes. However, accurately estimating the drift term in the reverse stochastic differential equation (SDE) solely from the unnormalized target density poses significant challenges, hindering existing methods from achieving state-of-the-art performance. In this paper, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15336v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15336v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15336v1-abstract-full" style="display: none;"> Recent success of diffusion models has inspired a surge of interest in developing sampling techniques using reverse diffusion processes. However, accurately estimating the drift term in the reverse stochastic differential equation (SDE) solely from the unnormalized target density poses significant challenges, hindering existing methods from achieving state-of-the-art performance. In this paper, we introduce the Diffusion-PINN Sampler (DPS), a novel diffusion-based sampling algorithm that estimates the drift term by solving the governing partial differential equation of the log-density of the underlying SDE marginals via physics-informed neural networks (PINN). We prove that the error of log-density approximation can be controlled by the PINN residual loss, enabling us to establish convergence guarantees of DPS. Experiments on a variety of sampling tasks demonstrate the effectiveness of our approach, particularly in accurately identifying mixing proportions when the target contains isolated components. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15336v1-abstract-full').style.display = 'none'; document.getElementById('2410.15336v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15332">arXiv:2410.15332</a> <span> [<a href="https://arxiv.org/pdf/2410.15332">pdf</a>, <a href="https://arxiv.org/format/2410.15332">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> EPIC: Efficient Position-Independent Context Caching for Serving Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+J">Junhao Hu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenrui Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoyi Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weidong Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+T">Tiancheng Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qin Zhang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+H">Hao Feng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xusheng Chen</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+Y">Yizhou Shan</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15332v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) are critical for a wide range of applications, but serving them efficiently becomes increasingly challenging as inputs become more complex. Context caching improves serving performance by exploiting inter-request dependency and reusing key-value (KV) cache across requests, thus improving time-to-first-token (TTFT). However, existing prefix-based context caching require… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15332v2-abstract-full').style.display = 'inline'; document.getElementById('2410.15332v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15332v2-abstract-full" style="display: none;"> Large Language Models (LLMs) are critical for a wide range of applications, but serving them efficiently becomes increasingly challenging as inputs become more complex. Context caching improves serving performance by exploiting inter-request dependency and reusing key-value (KV) cache across requests, thus improving time-to-first-token (TTFT). However, existing prefix-based context caching requires exact token prefix matches, limiting cache reuse in few-shot learning, multi-document QA, or retrieval-augmented generation, where prefixes may vary. In this paper, we present EPIC, an LLM serving system that introduces position-independent context caching (PIC), enabling modular KV cache reuse regardless of token chunk position (or prefix). EPIC features two key designs: AttnLink, which leverages static attention sparsity to minimize recomputation for accuracy recovery, and KVSplit, a customizable chunking method that preserves semantic coherence. Our experiments demonstrate that Epic delivers up to 8x improvements in TTFT and 7x throughput over existing systems, with negligible or no accuracy loss. By addressing the limitations of traditional caching approaches, Epic enables more scalable and efficient LLM inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15332v2-abstract-full').style.display = 'none'; document.getElementById('2410.15332v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11765">arXiv:2410.11765</a> <span> [<a href="https://arxiv.org/pdf/2410.11765">pdf</a>, <a href="https://arxiv.org/format/2410.11765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ECGN: A Cluster-Aware Approach to Graph Neural Networks for Imbalanced Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thapaliya%2C+B">Bishal Thapaliya</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+A">Anh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yao Lu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a>, <a href="/search/cs?searchtype=author&query=Grudetskyi%2C+I">Igor Grudetskyi</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+F">Fudong Lin</a>, <a href="/search/cs?searchtype=author&query=Valkanas%2C+A">Antonios Valkanas</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingyu Liu</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+D">Deepayan Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Fehri%2C+B">Bilel Fehri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11765v1-abstract-short" style="display: inline;"> Classifying nodes in a graph is a common problem. The ideal classifier must adapt to any imbalances in the class distribution. It must also use information in the clustering structure of real-world graphs. Existing Graph Neural Networks (GNNs) have not addressed both problems together. We propose the Enhanced Cluster-aware Graph Network (ECGN), a novel method that addresses these issues by integra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11765v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11765v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11765v1-abstract-full" style="display: none;"> Classifying nodes in a graph is a common problem. The ideal classifier must adapt to any imbalances in the class distribution. It must also use information in the clustering structure of real-world graphs. Existing Graph Neural Networks (GNNs) have not addressed both problems together. We propose the Enhanced Cluster-aware Graph Network (ECGN), a novel method that addresses these issues by integrating cluster-specific training with synthetic node generation. Unlike traditional GNNs that apply the same node update process for all nodes, ECGN learns different aggregations for different clusters. We also use the clusters to generate new minority-class nodes in a way that helps clarify the inter-class decision boundary. By combining cluster-aware embeddings with a global integration step, ECGN enhances the quality of the resulting node embeddings. Our method works with any underlying GNN and any cluster generation technique. Experimental results show that ECGN consistently outperforms its closest competitors by up to 11% on some widely studied benchmark datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11765v1-abstract-full').style.display = 'none'; document.getElementById('2410.11765v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08511">arXiv:2410.08511</a> <span> [<a href="https://arxiv.org/pdf/2410.08511">pdf</a>, <a href="https://arxiv.org/format/2410.08511">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Distributionally robust self-supervised learning for tabular data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ghosh%2C+S">Shantanu Ghosh</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tiankang Xie</a>, <a href="/search/cs?searchtype=author&query=Kuznetsov%2C+M">Mikhail Kuznetsov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08511v5-abstract-short" style="display: inline;"> Machine learning (ML) models trained using Empirical Risk Minimization (ERM) often exhibit systematic errors on specific subpopulations of tabular data, known as error slices. Learning robust representation in presence of error slices is challenging, especially in self-supervised settings during the feature reconstruction phase, due to high cardinality features and the complexity of constructing e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08511v5-abstract-full').style.display = 'inline'; document.getElementById('2410.08511v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08511v5-abstract-full" style="display: none;"> Machine learning (ML) models trained using Empirical Risk Minimization (ERM) often exhibit systematic errors on specific subpopulations of tabular data, known as error slices. Learning robust representation in presence of error slices is challenging, especially in self-supervised settings during the feature reconstruction phase, due to high cardinality features and the complexity of constructing error sets. Traditional robust representation learning methods are largely focused on improving worst group performance in supervised setting in computer vision, leaving a gap in approaches tailored for tabular data. We address this gap by developing a framework to learn robust representation in tabular data during self-supervised pre-training. Our approach utilizes an encoder-decoder model trained with Masked Language Modeling (MLM) loss to learn robust latent representations. This paper applies the Just Train Twice (JTT) and Deep Feature Reweighting (DFR) methods during the pre-training phase for tabular data. These methods fine-tune the ERM pre-trained model by up-weighting error-prone samples or creating balanced datasets for specific categorical features. This results in specialized models for each feature, which are then used in an ensemble approach to enhance downstream classification performance. This methodology improves robustness across slices, thus enhancing overall generalization performance. Extensive experiments across various datasets demonstrate the efficacy of our approach. The code is available: \url{https://github.com/amazon-science/distributionally-robust-self-supervised-learning-for-tabular-data}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08511v5-abstract-full').style.display = 'none'; document.getElementById('2410.08511v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">TRL Workshop@NeurIPS2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07588">arXiv:2410.07588</a> <span> [<a href="https://arxiv.org/pdf/2410.07588">pdf</a>, <a href="https://arxiv.org/format/2410.07588">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Careful About What App Promotion Ads Recommend! Detecting and Explaining Malware Promotion via App Promotion Graph </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shang Ma</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chaoran Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shao Yang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shifu Hou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T+J">Toby Jia-Jun Li</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+X">Xusheng Xiao</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tao Xie</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yanfang Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07588v2-abstract-short" style="display: inline;"> In Android apps, their developers frequently place app promotion ads, namely advertisements to promote other apps. Unfortunately, the inadequate vetting of ad content allows malicious developers to exploit app promotion ads as a new distribution channel for malware. To help detect malware distributed via app promotion ads, in this paper, we propose a novel approach, named ADGPE, that synergistical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07588v2-abstract-full').style.display = 'inline'; document.getElementById('2410.07588v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07588v2-abstract-full" style="display: none;"> In Android apps, their developers frequently place app promotion ads, namely advertisements to promote other apps. Unfortunately, the inadequate vetting of ad content allows malicious developers to exploit app promotion ads as a new distribution channel for malware. To help detect malware distributed via app promotion ads, in this paper, we propose a novel approach, named ADGPE, that synergistically integrates app user interface (UI) exploration with graph learning to automatically collect app promotion ads, detect malware promoted by these ads, and explain the promotion mechanisms employed by the detected malware. Our evaluation on 18, 627 app promotion ads demonstrates the substantial risks in the app promotion ecosystem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07588v2-abstract-full').style.display = 'none'; document.getElementById('2410.07588v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NDSS Symposium 2025 Accepted Papers</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07076">arXiv:2410.07076</a> <span> [<a href="https://arxiv.org/pdf/2410.07076">pdf</a>, <a href="https://arxiv.org/format/2410.07076">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MOOSE-Chem: Large Language Models for Rediscovering Unseen Chemistry Scientific Hypotheses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zonglin Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wanhao Liu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+B">Ben Gao</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tong Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuqiang Li</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/cs?searchtype=author&query=Poria%2C+S">Soujanya Poria</a>, <a href="/search/cs?searchtype=author&query=Cambria%2C+E">Erik Cambria</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+D">Dongzhan Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07076v3-abstract-short" style="display: inline;"> Scientific discovery contributes largely to human society's prosperity, and recent progress shows that LLMs could potentially catalyze this process. However, it is still unclear whether LLMs can discover novel and valid hypotheses in chemistry. In this work, we investigate this central research question: Can LLMs automatically discover novel and valid chemistry research hypotheses given only a che… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07076v3-abstract-full').style.display = 'inline'; document.getElementById('2410.07076v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07076v3-abstract-full" style="display: none;"> Scientific discovery contributes largely to human society's prosperity, and recent progress shows that LLMs could potentially catalyze this process. However, it is still unclear whether LLMs can discover novel and valid hypotheses in chemistry. In this work, we investigate this central research question: Can LLMs automatically discover novel and valid chemistry research hypotheses given only a chemistry research background (consisting of a research question and/or a background survey), without limitation on the domain of the research question? After extensive discussions with chemistry experts, we propose an assumption that a majority of chemistry hypotheses can be resulted from a research background and several inspirations. With this key insight, we break the central question into three smaller fundamental questions. In brief, they are: (1) given a background question, whether LLMs can retrieve good inspirations; (2) with background and inspirations, whether LLMs can lead to hypothesis; and (3) whether LLMs can identify good hypotheses to rank them higher. To investigate these questions, we construct a benchmark consisting of 51 chemistry papers published in Nature, Science, or a similar level in 2024 (all papers are only available online since 2024). Every paper is divided by chemistry PhD students into three components: background, inspirations, and hypothesis. The goal is to rediscover the hypothesis, given only the background and a large randomly selected chemistry literature corpus consisting the ground truth inspiration papers, with LLMs trained with data up to 2023. We also develop an LLM-based multi-agent framework that leverages the assumption, consisting of three stages reflecting the three smaller questions. The proposed method can rediscover many hypotheses with very high similarity with the ground truth ones, covering the main innovations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07076v3-abstract-full').style.display = 'none'; document.getElementById('2410.07076v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code and Benchmark are available at https://github.com/ZonglinY/MOOSE-Chem.git</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xie%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Xie%2C+T&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>