Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,652 results for author: <span class="mathjax">Jiang, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Jiang%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Jiang, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Jiang%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Jiang, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14744">arXiv:2502.14744</a> <span> [<a href="https://arxiv.org/pdf/2502.14744">pdf</a>, <a href="https://arxiv.org/format/2502.14744">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> HiddenDetect: Detecting Jailbreak Attacks against Large Vision-Language Models via Monitoring Hidden States </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yilei Jiang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xinyan Gao</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+T">Tianshuo Peng</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Y">Yingshui Tan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xiaoyong Zhu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+B">Bo Zheng</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+X">Xiangyu Yue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14744v1-abstract-short" style="display: inline;"> The integration of additional modalities increases the susceptibility of large vision-language models (LVLMs) to safety risks, such as jailbreak attacks, compared to their language-only counterparts. While existing research primarily focuses on post-hoc alignment techniques, the underlying safety mechanisms within LVLMs remain largely unexplored. In this work , we investigate whether LVLMs inheren… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14744v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14744v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14744v1-abstract-full" style="display: none;"> The integration of additional modalities increases the susceptibility of large vision-language models (LVLMs) to safety risks, such as jailbreak attacks, compared to their language-only counterparts. While existing research primarily focuses on post-hoc alignment techniques, the underlying safety mechanisms within LVLMs remain largely unexplored. In this work , we investigate whether LVLMs inherently encode safety-relevant signals within their internal activations during inference. Our findings reveal that LVLMs exhibit distinct activation patterns when processing unsafe prompts, which can be leveraged to detect and mitigate adversarial inputs without requiring extensive fine-tuning. Building on this insight, we introduce HiddenDetect, a novel tuning-free framework that harnesses internal model activations to enhance safety. Experimental results show that {HiddenDetect} surpasses state-of-the-art methods in detecting jailbreak attacks against LVLMs. By utilizing intrinsic safety-aware patterns, our method provides an efficient and scalable solution for strengthening LVLM robustness against multimodal threats. Our code will be released publicly at https://github.com/leigest519/HiddenDetect. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14744v1-abstract-full').style.display = 'none'; document.getElementById('2502.14744v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13832">arXiv:2502.13832</a> <span> [<a href="https://arxiv.org/pdf/2502.13832">pdf</a>, <a href="https://arxiv.org/format/2502.13832">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713274">10.1145/3706598.3713274 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> ArtMentor: AI-Assisted Evaluation of Artworks to Explore Multimodal Large Language Models Capabilities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+C">Chanjin Zheng</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zengyi Yu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yilin Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mingzi Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xunuo Lu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+J">Jing Jin</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Liteng Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13832v1-abstract-short" style="display: inline;"> Can Multimodal Large Language Models (MLLMs), with capabilities in perception, recognition, understanding, and reasoning, function as independent assistants in art evaluation dialogues? Current MLLM evaluation methods, which rely on subjective human scoring or costly interviews, lack comprehensive coverage of various scenarios. This paper proposes a process-oriented Human-Computer Interaction (HCI… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13832v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13832v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13832v1-abstract-full" style="display: none;"> Can Multimodal Large Language Models (MLLMs), with capabilities in perception, recognition, understanding, and reasoning, function as independent assistants in art evaluation dialogues? Current MLLM evaluation methods, which rely on subjective human scoring or costly interviews, lack comprehensive coverage of various scenarios. This paper proposes a process-oriented Human-Computer Interaction (HCI) space design to facilitate more accurate MLLM assessment and development. This approach aids teachers in efficient art evaluation while also recording interactions for MLLM capability assessment. We introduce ArtMentor, a comprehensive space that integrates a dataset and three systems to optimize MLLM evaluation. The dataset consists of 380 sessions conducted by five art teachers across nine critical dimensions. The modular system includes agents for entity recognition, review generation, and suggestion generation, enabling iterative upgrades. Machine learning and natural language processing techniques ensure the reliability of evaluations. The results confirm GPT-4o's effectiveness in assisting teachers in art evaluation dialogues. Our contributions are available at https://artmentor.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13832v1-abstract-full').style.display = 'none'; document.getElementById('2502.13832v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 12 figures. Accepted by CHI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13539">arXiv:2502.13539</a> <span> [<a href="https://arxiv.org/pdf/2502.13539">pdf</a>, <a href="https://arxiv.org/format/2502.13539">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Bursting Filter Bubble: Enhancing Serendipity Recommendations with Aligned Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xi%2C+Y">Yunjia Xi</a>, <a href="/search/cs?searchtype=author&query=Weng%2C+M">Muyan Weng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+C">Chao Yi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Dian Chen</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+G">Gaoyang Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mao Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jian Wu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuning Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qingwen Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yong Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weinan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13539v1-abstract-short" style="display: inline;"> Recommender systems (RSs) often suffer from the feedback loop phenomenon, e.g., RSs are trained on data biased by their recommendations. This leads to the filter bubble effect that reinforces homogeneous content and reduces user satisfaction. To this end, serendipity recommendations, which offer unexpected yet relevant items, are proposed. Recently, large language models (LLMs) have shown potentia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13539v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13539v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13539v1-abstract-full" style="display: none;"> Recommender systems (RSs) often suffer from the feedback loop phenomenon, e.g., RSs are trained on data biased by their recommendations. This leads to the filter bubble effect that reinforces homogeneous content and reduces user satisfaction. To this end, serendipity recommendations, which offer unexpected yet relevant items, are proposed. Recently, large language models (LLMs) have shown potential in serendipity prediction due to their extensive world knowledge and reasoning capabilities. However, they still face challenges in aligning serendipity judgments with human assessments, handling long user behavior sequences, and meeting the latency requirements of industrial RSs. To address these issues, we propose SERAL (Serendipity Recommendations with Aligned Large Language Models), a framework comprising three stages: (1) Cognition Profile Generation to compress user behavior into multi-level profiles; (2) SerenGPT Alignment to align serendipity judgments with human preferences using enriched training data; and (3) Nearline Adaptation to integrate SerenGPT into industrial RSs pipelines efficiently. Online experiments demonstrate that SERAL improves exposure ratio (PVR), clicks, and transactions of serendipitous items by 5.7%, 29.56%, and 27.6%, enhancing user experience without much impact on overall revenue. Now, it has been fully deployed in the "Guess What You Like" of the Taobao App homepage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13539v1-abstract-full').style.display = 'none'; document.getElementById('2502.13539v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12601">arXiv:2502.12601</a> <span> [<a href="https://arxiv.org/pdf/2502.12601">pdf</a>, <a href="https://arxiv.org/format/2502.12601">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> COPU: Conformal Prediction for Uncertainty Quantification in Natural Language Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sean Wang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yicheng Jiang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yuxin Tang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+L">Lu Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hanjie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12601v1-abstract-short" style="display: inline;"> Uncertainty Quantification (UQ) for Natural Language Generation (NLG) is crucial for assessing the performance of Large Language Models (LLMs), as it reveals confidence in predictions, identifies failure modes, and gauges output reliability. Conformal Prediction (CP), a model-agnostic method that generates prediction sets with a specified error rate, has been adopted for UQ in classification tasks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12601v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12601v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12601v1-abstract-full" style="display: none;"> Uncertainty Quantification (UQ) for Natural Language Generation (NLG) is crucial for assessing the performance of Large Language Models (LLMs), as it reveals confidence in predictions, identifies failure modes, and gauges output reliability. Conformal Prediction (CP), a model-agnostic method that generates prediction sets with a specified error rate, has been adopted for UQ in classification tasks, where the size of the prediction set indicates the model's uncertainty. However, when adapting CP to NLG, the sampling-based method for generating candidate outputs cannot guarantee the inclusion of the ground truth, limiting its applicability across a wide range of error rates. To address this, we propose \ourmethod, a method that explicitly adds the ground truth to the candidate outputs and uses logit scores to measure nonconformity. Our experiments with six LLMs on four NLG tasks show that \ourmethod outperforms baseline methods in calibrating error rates and empirical cover rates, offering accurate UQ across a wide range of user-specified error rates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12601v1-abstract-full').style.display = 'none'; document.getElementById('2502.12601v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12501">arXiv:2502.12501</a> <span> [<a href="https://arxiv.org/pdf/2502.12501">pdf</a>, <a href="https://arxiv.org/format/2502.12501">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Crowd Comparative Reasoning: Unlocking Comprehensive Evaluations for LLM-as-a-Judge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yufei Wang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuxin Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Liangyou Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chuhan Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasheng Wang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xin Jiang</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+L">Lifeng Shang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruiming Tang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chen Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12501v1-abstract-short" style="display: inline;"> LLM-as-a-Judge, which generates chain-of-thought (CoT) judgments, has become a widely adopted auto-evaluation method. However, its reliability is compromised by the CoT reasoning's inability to capture comprehensive and deeper details, often leading to incomplete outcomes. Existing methods mainly rely on majority voting or criteria expansion, which is insufficient to address the limitation in CoT.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12501v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12501v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12501v1-abstract-full" style="display: none;"> LLM-as-a-Judge, which generates chain-of-thought (CoT) judgments, has become a widely adopted auto-evaluation method. However, its reliability is compromised by the CoT reasoning's inability to capture comprehensive and deeper details, often leading to incomplete outcomes. Existing methods mainly rely on majority voting or criteria expansion, which is insufficient to address the limitation in CoT. We propose Crowd-based Comparative Evaluation, which introduces additional crowd responses to compare with the candidate responses, thereby exposing deeper and more comprehensive details within the candidate responses. This process effectively guides LLM-as-a-Judge to provide a more detailed CoT judgment. Extensive experiments demonstrate that our approach enhances evaluation reliability, achieving an average accuracy gain of 6.7% across five benchmarks. Moreover, our method produces higher-quality CoTs that facilitate judge distillation and exhibit superior performance in rejection sampling for supervised fine-tuning (SFT), referred to as crowd rejection sampling, thereby enabling more efficient SFT. Our analysis confirms that CoTs generated by ours are more comprehensive and of higher quality, and evaluation accuracy improves as inference scales. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12501v1-abstract-full').style.display = 'none'; document.getElementById('2502.12501v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11822">arXiv:2502.11822</a> <span> [<a href="https://arxiv.org/pdf/2502.11822">pdf</a>, <a href="https://arxiv.org/format/2502.11822">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Assessing the impacts of tradable credit schemes through agent-based simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+R">Renming Liu</a>, <a href="/search/cs?searchtype=author&query=Argyros%2C+D">Dimitrios Argyros</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yu Jiang</a>, <a href="/search/cs?searchtype=author&query=Ben-Akiva%2C+M+E">Moshe E. Ben-Akiva</a>, <a href="/search/cs?searchtype=author&query=Seshadri%2C+R">Ravi Seshadri</a>, <a href="/search/cs?searchtype=author&query=Azevedo%2C+C+L">Carlos Lima Azevedo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11822v1-abstract-short" style="display: inline;"> Tradable credit schemes (TCS) have been attracting interest from the transportation research community as an appealing alternative to congestion pricing, due to the advantages of revenue neutrality and equity. Nonetheless, existing research has largely employed network and market equilibrium approaches with simplistic characterizations of transportation demand, supply, credit market operations, an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11822v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11822v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11822v1-abstract-full" style="display: none;"> Tradable credit schemes (TCS) have been attracting interest from the transportation research community as an appealing alternative to congestion pricing, due to the advantages of revenue neutrality and equity. Nonetheless, existing research has largely employed network and market equilibrium approaches with simplistic characterizations of transportation demand, supply, credit market operations, and market behavior. Agent- and activity-based simulation affords a natural means to comprehensively assess TCS by more realistically modeling demand, supply, and individual market interactions. We propose an integrated simulation framework for modeling a TCS, and implements it within the state-of-the-art open-source urban simulation platform SimMobility, including: (a) a flexible TCS design that considers multiple trips and explicitly accounts for individual trading behaviors; (b) a simulation framework that captures the complex interactions between a TCS regulator, the traveler, and the TCS market itself, with the flexibility to test future TCS designs and relevant mobility models; and (c) a set of simulation experiments on a large mesoscopic multimodal network combined with a Bayesian Optimization approach for TCS optimal design. The experiment results indicate network and market performance to stabilize over the day-to-day process, showing the alignment of our agent-based simulation with the known theoretical properties of TCS. We confirm the efficiency of TCS in reducing congestion under the adopted market behavioral assumptions and open the door for simulating different individual behaviors. We measure how TCS impacts differently the local network, heterogeneous users, the different travel behaviors, and how testing different TCS designs can avoid negative market trading behaviors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11822v1-abstract-full').style.display = 'none'; document.getElementById('2502.11822v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11555">arXiv:2502.11555</a> <span> [<a href="https://arxiv.org/pdf/2502.11555">pdf</a>, <a href="https://arxiv.org/format/2502.11555">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Equilibrate RLHF: Towards Balancing Helpfulness-Safety Trade-off in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+Y">Yingshui Tan</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yilei Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yanshi Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaheng Liu</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+X">Xingyuan Bu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+W">Wenbo Su</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+X">Xiangyu Yue</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xiaoyong Zhu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+B">Bo Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11555v1-abstract-short" style="display: inline;"> Fine-tuning large language models (LLMs) based on human preferences, commonly achieved through reinforcement learning from human feedback (RLHF), has been effective in improving their performance. However, maintaining LLM safety throughout the fine-tuning process remains a significant challenge, as resolving conflicts between safety and helpfulness can be non-trivial. Typically, the safety alignme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11555v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11555v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11555v1-abstract-full" style="display: none;"> Fine-tuning large language models (LLMs) based on human preferences, commonly achieved through reinforcement learning from human feedback (RLHF), has been effective in improving their performance. However, maintaining LLM safety throughout the fine-tuning process remains a significant challenge, as resolving conflicts between safety and helpfulness can be non-trivial. Typically, the safety alignment of LLM is trained on data with safety-related categories. However, our experiments find that naively increasing the scale of safety training data usually leads the LLMs to an ``overly safe'' state rather than a ``truly safe'' state, boosting the refusal rate through extensive safety-aligned data without genuinely understanding the requirements for safe responses. Such an approach can inadvertently diminish the models' helpfulness. To understand the phenomenon, we first investigate the role of safety data by categorizing them into three different groups, and observe that each group behaves differently as training data scales up. To boost the balance between safety and helpfulness, we propose an Equilibrate RLHF framework including a Fine-grained Data-centric (FDC) approach that achieves better safety alignment even with fewer training data, and an Adaptive Message-wise Alignment (AMA) approach, which selectively highlight the key segments through a gradient masking strategy. Extensive experimental results demonstrate that our approach significantly enhances the safety alignment of LLMs while balancing safety and helpfulness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11555v1-abstract-full').style.display = 'none'; document.getElementById('2502.11555v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11158">arXiv:2502.11158</a> <span> [<a href="https://arxiv.org/pdf/2502.11158">pdf</a>, <a href="https://arxiv.org/format/2502.11158">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AnyRefill: A Unified, Data-Efficient Framework for Left-Prompt-Guided Vision Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+M">Ming Xie</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+C">Chenjie Cao</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Y">Yunuo Cai</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+X">Xiangyang Xue</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yu-Gang Jiang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yanwei Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11158v2-abstract-short" style="display: inline;"> In this paper, we present a novel Left-Prompt-Guided (LPG) paradigm to address a diverse range of reference-based vision tasks. Inspired by the human creative process, we reformulate these tasks using a left-right stitching formulation to construct contextual input. Building upon this foundation, we propose AnyRefill, an extension of LeftRefill, that effectively adapts Text-to-Image (T2I) models t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11158v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11158v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11158v2-abstract-full" style="display: none;"> In this paper, we present a novel Left-Prompt-Guided (LPG) paradigm to address a diverse range of reference-based vision tasks. Inspired by the human creative process, we reformulate these tasks using a left-right stitching formulation to construct contextual input. Building upon this foundation, we propose AnyRefill, an extension of LeftRefill, that effectively adapts Text-to-Image (T2I) models to various vision tasks. AnyRefill leverages the inpainting priors of advanced T2I model based on the Diffusion Transformer (DiT) architecture, and incorporates flexible components to enhance its capabilities. By combining task-specific LoRAs with the stitching input, AnyRefill unlocks its potential across diverse tasks, including conditional generation, visual perception, and image editing, without requiring additional visual encoders. Meanwhile, AnyRefill exhibits remarkable data efficiency, requiring minimal task-specific fine-tuning while maintaining high generative performance. Through extensive ablation studies, we demonstrate that AnyRefill outperforms other image condition injection methods and achieves competitive results compared to state-of-the-art open-source methods. Notably, AnyRefill delivers results comparable to advanced commercial tools, such as IC-Light and SeedEdit, even in challenging scenarios. Comprehensive experiments and ablation studies across versatile tasks validate the strong generation of the proposed simple yet effective LPG formulation, establishing AnyRefill as a unified, highly data-efficient solution for reference-based vision tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11158v2-abstract-full').style.display = 'none'; document.getElementById('2502.11158v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, submitted to TPAMI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11143">arXiv:2502.11143</a> <span> [<a href="https://arxiv.org/pdf/2502.11143">pdf</a>, <a href="https://arxiv.org/format/2502.11143">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> VulRG: Multi-Level Explainable Vulnerability Patch Ranking for Complex Systems Using Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuning Jiang</a>, <a href="/search/cs?searchtype=author&query=Oo%2C+N">Nay Oo</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Q">Qiaoran Meng</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+H+W">Hoon Wei Lim</a>, <a href="/search/cs?searchtype=author&query=Sikdar%2C+B">Biplab Sikdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11143v1-abstract-short" style="display: inline;"> As interconnected systems proliferate, safeguarding complex infrastructures against an escalating array of cyber threats has become an urgent challenge. The increasing number of vulnerabilities, combined with resource constraints, makes addressing every vulnerability impractical, making effective prioritization essential. However, existing risk prioritization methods often rely on expert judgment… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11143v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11143v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11143v1-abstract-full" style="display: none;"> As interconnected systems proliferate, safeguarding complex infrastructures against an escalating array of cyber threats has become an urgent challenge. The increasing number of vulnerabilities, combined with resource constraints, makes addressing every vulnerability impractical, making effective prioritization essential. However, existing risk prioritization methods often rely on expert judgment or focus solely on exploit likelihood and consequences, lacking the granularity and adaptability needed for complex systems. This work introduces a graph-based framework for vulnerability patch prioritization that optimizes security by integrating diverse data sources and metrics into a universally applicable model. Refined risk metrics enable detailed assessments at the component, asset, and system levels. The framework employs two key graphs: a network communication graph to model potential attack paths and identify the shortest routes to critical assets, and a system dependency graph to capture risk propagation from exploited vulnerabilities across interconnected components. Asset criticality and component dependency rules systematically assess and mitigate risks. Benchmarking against state-of-the-art methods demonstrates superior accuracy in vulnerability patch ranking, with enhanced explainability. This framework advances vulnerability management and sets the stage for future research in adaptive cybersecurity strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11143v1-abstract-full').style.display = 'none'; document.getElementById('2502.11143v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68M25 (Primary) 68Q99 (Secondary) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11070">arXiv:2502.11070</a> <span> [<a href="https://arxiv.org/pdf/2502.11070">pdf</a>, <a href="https://arxiv.org/format/2502.11070">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Vulnerability Prioritization: Taxonomy, Metrics, and Research Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuning Jiang</a>, <a href="/search/cs?searchtype=author&query=Oo%2C+N">Nay Oo</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Q">Qiaoran Meng</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+H+W">Hoon Wei Lim</a>, <a href="/search/cs?searchtype=author&query=Sikdar%2C+B">Biplab Sikdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11070v1-abstract-short" style="display: inline;"> In the highly interconnected digital landscape of today, safeguarding complex infrastructures against cyber threats has become increasingly challenging due to the exponential growth in the number and complexity of vulnerabilities. Resource constraints necessitate effective vulnerability prioritization strategies, focusing efforts on the most critical risks. This paper presents a systematic literat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11070v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11070v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11070v1-abstract-full" style="display: none;"> In the highly interconnected digital landscape of today, safeguarding complex infrastructures against cyber threats has become increasingly challenging due to the exponential growth in the number and complexity of vulnerabilities. Resource constraints necessitate effective vulnerability prioritization strategies, focusing efforts on the most critical risks. This paper presents a systematic literature review of 82 studies, introducing a novel taxonomy that categorizes metrics into severity, exploitability, contextual factors, predictive indicators, and aggregation methods. Our analysis reveals significant gaps in existing approaches and challenges with multi-domain applicability. By emphasizing the need for dynamic, context-aware metrics and scalable solutions, we provide actionable insights to bridge the gap between research and real-world applications. This work contributes to the field by offering a comprehensive framework for evaluating vulnerability prioritization methodologies and setting a research agenda to advance the state of practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11070v1-abstract-full').style.display = 'none'; document.getElementById('2502.11070v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10825">arXiv:2502.10825</a> <span> [<a href="https://arxiv.org/pdf/2502.10825">pdf</a>, <a href="https://arxiv.org/ps/2502.10825">ps</a>, <a href="https://arxiv.org/format/2502.10825">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MITRE ATT&CK Applications in Cybersecurity and The Way Forward </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuning Jiang</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Q">Qiaoran Meng</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+F">Feiyang Shang</a>, <a href="/search/cs?searchtype=author&query=Oo%2C+N">Nay Oo</a>, <a href="/search/cs?searchtype=author&query=Minh%2C+L+T+H">Le Thi Hong Minh</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+H+W">Hoon Wei Lim</a>, <a href="/search/cs?searchtype=author&query=Sikdar%2C+B">Biplab Sikdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10825v1-abstract-short" style="display: inline;"> The MITRE ATT&CK framework is a widely adopted tool for enhancing cybersecurity, supporting threat intelligence, incident response, attack modeling, and vulnerability prioritization. This paper synthesizes research on its application across these domains by analyzing 417 peer-reviewed publications. We identify commonly used adversarial tactics, techniques, and procedures (TTPs) and examine the int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10825v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10825v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10825v1-abstract-full" style="display: none;"> The MITRE ATT&CK framework is a widely adopted tool for enhancing cybersecurity, supporting threat intelligence, incident response, attack modeling, and vulnerability prioritization. This paper synthesizes research on its application across these domains by analyzing 417 peer-reviewed publications. We identify commonly used adversarial tactics, techniques, and procedures (TTPs) and examine the integration of natural language processing (NLP) and machine learning (ML) with ATT&CK to improve threat detection and response. Additionally, we explore the interoperability of ATT&CK with other frameworks, such as the Cyber Kill Chain, NIST guidelines, and STRIDE, highlighting its versatility. The paper further evaluates the framework from multiple perspectives, including its effectiveness, validation methods, and sector-specific challenges, particularly in industrial control systems (ICS) and healthcare. We conclude by discussing current limitations and proposing future research directions to enhance the applicability of ATT&CK in dynamic cybersecurity environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10825v1-abstract-full').style.display = 'none'; document.getElementById('2502.10825v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">37 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68M25 (Primary) 68T99 (Secondary) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10694">arXiv:2502.10694</a> <span> [<a href="https://arxiv.org/pdf/2502.10694">pdf</a>, <a href="https://arxiv.org/format/2502.10694">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TIM.2025.3527531">10.1109/TIM.2025.3527531 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Simulations of Common Unsupervised Domain Adaptation Algorithms for Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chaddad%2C+A">Ahmad Chaddad</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yihang Wu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuchen Jiang</a>, <a href="/search/cs?searchtype=author&query=Bouridane%2C+A">Ahmed Bouridane</a>, <a href="/search/cs?searchtype=author&query=Desrosiers%2C+C">Christian Desrosiers</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10694v1-abstract-short" style="display: inline;"> Traditional machine learning assumes that training and test sets are derived from the same distribution; however, this assumption does not always hold in practical applications. This distribution disparity can lead to severe performance drops when the trained model is used in new data sets. Domain adaptation (DA) is a machine learning technique that aims to address this problem by reducing the dif… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10694v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10694v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10694v1-abstract-full" style="display: none;"> Traditional machine learning assumes that training and test sets are derived from the same distribution; however, this assumption does not always hold in practical applications. This distribution disparity can lead to severe performance drops when the trained model is used in new data sets. Domain adaptation (DA) is a machine learning technique that aims to address this problem by reducing the differences between domains. This paper presents simulation-based algorithms of recent DA techniques, mainly related to unsupervised domain adaptation (UDA), where labels are available only in the source domain. Our study compares these techniques with public data sets and diverse characteristics, highlighting their respective strengths and drawbacks. For example, Safe Self-Refinement for Transformer-based DA (SSRT) achieved the highest accuracy (91.6\%) in the office-31 data set during our simulations, however, the accuracy dropped to 72.4\% in the Office-Home data set when using limited batch sizes. In addition to improving the reader's comprehension of recent techniques in DA, our study also highlights challenges and upcoming directions for research in this domain. The codes are available at https://github.com/AIPMLab/Domain_Adaptation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10694v1-abstract-full').style.display = 'none'; document.getElementById('2502.10694v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in IEEE TIM</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10308">arXiv:2502.10308</a> <span> [<a href="https://arxiv.org/pdf/2502.10308">pdf</a>, <a href="https://arxiv.org/format/2502.10308">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LLM-Powered Preference Elicitation in Combinatorial Assignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Soumalias%2C+E">Ermis Soumalias</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yanchen Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+K">Kehang Zhu</a>, <a href="/search/cs?searchtype=author&query=Curry%2C+M">Michael Curry</a>, <a href="/search/cs?searchtype=author&query=Seuken%2C+S">Sven Seuken</a>, <a href="/search/cs?searchtype=author&query=Parkes%2C+D+C">David C. Parkes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10308v1-abstract-short" style="display: inline;"> We study the potential of large language models (LLMs) as proxies for humans to simplify preference elicitation (PE) in combinatorial assignment. While traditional PE methods rely on iterative queries to capture preferences, LLMs offer a one-shot alternative with reduced human effort. We propose a framework for LLM proxies that can work in tandem with SOTA ML-powered preference elicitation schemes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10308v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10308v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10308v1-abstract-full" style="display: none;"> We study the potential of large language models (LLMs) as proxies for humans to simplify preference elicitation (PE) in combinatorial assignment. While traditional PE methods rely on iterative queries to capture preferences, LLMs offer a one-shot alternative with reduced human effort. We propose a framework for LLM proxies that can work in tandem with SOTA ML-powered preference elicitation schemes. Our framework handles the novel challenges introduced by LLMs, such as response variability and increased computational costs. We experimentally evaluate the efficiency of LLM proxies against human queries in the well-studied course allocation domain, and we investigate the model capabilities required for success. We find that our approach improves allocative efficiency by up to 20%, and these results are robust across different LLMs and to differences in quality and accuracy of reporting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10308v1-abstract-full').style.display = 'none'; document.getElementById('2502.10308v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09977">arXiv:2502.09977</a> <span> [<a href="https://arxiv.org/pdf/2502.09977">pdf</a>, <a href="https://arxiv.org/format/2502.09977">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LaRA: Benchmarking Retrieval-Augmented Generation and Long-Context LLMs -- No Silver Bullet for LC or RAG Routing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+K">Kuan Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Liwen Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yong Jiang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+P">Pengjun Xie</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fei Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Minhao Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09977v1-abstract-short" style="display: inline;"> Effectively incorporating external knowledge into Large Language Models (LLMs) is crucial for enhancing their capabilities and addressing real-world needs. Retrieval-Augmented Generation (RAG) offers an effective method for achieving this by retrieving the most relevant fragments into LLMs. However, the advancements in context window size for LLMs offer an alternative approach, raising the questio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09977v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09977v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09977v1-abstract-full" style="display: none;"> Effectively incorporating external knowledge into Large Language Models (LLMs) is crucial for enhancing their capabilities and addressing real-world needs. Retrieval-Augmented Generation (RAG) offers an effective method for achieving this by retrieving the most relevant fragments into LLMs. However, the advancements in context window size for LLMs offer an alternative approach, raising the question of whether RAG remains necessary for effectively handling external knowledge. Several existing studies provide inconclusive comparisons between RAG and long-context (LC) LLMs, largely due to limitations in the benchmark designs. In this paper, we present LaRA, a novel benchmark specifically designed to rigorously compare RAG and LC LLMs. LaRA encompasses 2,326 test cases across four practical QA task categories and three types of naturally occurring long texts. Through systematic evaluation of seven open-source and four proprietary LLMs, we find that the optimal choice between RAG and LC depends on a complex interplay of factors, including the model's parameter size, long-text capabilities, context length, task type, and the characteristics of the retrieved chunks. Our findings provide actionable guidelines for practitioners to effectively leverage both RAG and LC approaches in developing and deploying LLM applications. Our code and dataset is provided at: \href{https://github.com/likuanppd/LaRA}{\textbf{https://github.com/likuanppd/LaRA}}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09977v1-abstract-full').style.display = 'none'; document.getElementById('2502.09977v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09723">arXiv:2502.09723</a> <span> [<a href="https://arxiv.org/pdf/2502.09723">pdf</a>, <a href="https://arxiv.org/format/2502.09723">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Making Them a Malicious Database: Exploiting Query Code to Jailbreak Aligned Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+Q">Qingsong Zou</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+J">Jingyu Xiao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qing Li</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Zhi Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuhang Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+L">Li Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+K">Kuofeng Gao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruoyu Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yong Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09723v2-abstract-short" style="display: inline;"> Recent advances in large language models (LLMs) have demonstrated remarkable potential in the field of natural language processing. Unfortunately, LLMs face significant security and ethical risks. Although techniques such as safety alignment are developed for defense, prior researches reveal the possibility of bypassing such defenses through well-designed jailbreak attacks. In this paper, we propo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09723v2-abstract-full').style.display = 'inline'; document.getElementById('2502.09723v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09723v2-abstract-full" style="display: none;"> Recent advances in large language models (LLMs) have demonstrated remarkable potential in the field of natural language processing. Unfortunately, LLMs face significant security and ethical risks. Although techniques such as safety alignment are developed for defense, prior researches reveal the possibility of bypassing such defenses through well-designed jailbreak attacks. In this paper, we propose QueryAttack, a novel framework to examine the generalizability of safety alignment. By treating LLMs as knowledge databases, we translate malicious queries in natural language into structured non-natural query language to bypass the safety alignment mechanisms of LLMs. We conduct extensive experiments on mainstream LLMs, and the results show that QueryAttack not only can achieve high attack success rates (ASRs), but also can jailbreak various defense methods. Furthermore, we tailor a defense method against QueryAttack, which can reduce ASR by up to 64% on GPT-4-1106. Our code is available at https://github.com/horizonsinzqs/QueryAttack. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09723v2-abstract-full').style.display = 'none'; document.getElementById('2502.09723v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09352">arXiv:2502.09352</a> <span> [<a href="https://arxiv.org/pdf/2502.09352">pdf</a>, <a href="https://arxiv.org/format/2502.09352">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Wasserstein distributional adversarial training for deep neural networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bai%2C+X">Xingjian Bai</a>, <a href="/search/cs?searchtype=author&query=He%2C+G">Guangyi He</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yifan Jiang</a>, <a href="/search/cs?searchtype=author&query=Obloj%2C+J">Jan Obloj</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09352v1-abstract-short" style="display: inline;"> Design of adversarial attacks for deep neural networks, as well as methods of adversarial training against them, are subject of intense research. In this paper, we propose methods to train against distributional attack threats, extending the TRADES method used for pointwise attacks. Our approach leverages recent contributions and relies on sensitivity analysis for Wasserstein distributionally robu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09352v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09352v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09352v1-abstract-full" style="display: none;"> Design of adversarial attacks for deep neural networks, as well as methods of adversarial training against them, are subject of intense research. In this paper, we propose methods to train against distributional attack threats, extending the TRADES method used for pointwise attacks. Our approach leverages recent contributions and relies on sensitivity analysis for Wasserstein distributionally robust optimization problems. We introduce an efficient fine-tuning method which can be deployed on a previously trained model. We test our methods on a range of pre-trained models on RobustBench. These experimental results demonstrate the additional training enhances Wasserstein distributional robustness, while maintaining original levels of pointwise robustness, even for already very successful networks. The improvements are less marked for models pre-trained using huge synthetic datasets of 20-100M images. However, remarkably, sometimes our methods are still able to improve their performance even when trained using only the original training dataset (50k images). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09352v1-abstract-full').style.display = 'none'; document.getElementById('2502.09352v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09334">arXiv:2502.09334</a> <span> [<a href="https://arxiv.org/pdf/2502.09334">pdf</a>, <a href="https://arxiv.org/format/2502.09334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> ThunderServe: High-performance and Cost-efficient LLM Serving in Cloud Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Youhe Jiang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+F">Fangcheng Fu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaozhe Yao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Taiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+B">Bin Cui</a>, <a href="/search/cs?searchtype=author&query=Klimovic%2C+A">Ana Klimovic</a>, <a href="/search/cs?searchtype=author&query=Yoneki%2C+E">Eiko Yoneki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09334v1-abstract-short" style="display: inline;"> Recent developments in large language models (LLMs) have demonstrated their remarkable proficiency in a range of tasks. Compared to in-house homogeneous GPU clusters, deploying LLMs in cloud environments with diverse types of GPUs is crucial for addressing the GPU shortage problem and being more cost-effective. However, the diversity of network environments and various GPU types on the cloud bring… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09334v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09334v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09334v1-abstract-full" style="display: none;"> Recent developments in large language models (LLMs) have demonstrated their remarkable proficiency in a range of tasks. Compared to in-house homogeneous GPU clusters, deploying LLMs in cloud environments with diverse types of GPUs is crucial for addressing the GPU shortage problem and being more cost-effective. However, the diversity of network environments and various GPU types on the cloud bring difficulties to achieving high-performance serving. In this work, we propose ThunderServe, a high-performance and cost-efficient LLM serving system for heterogeneous cloud environments. We introduce a novel scheduling algorithm, which optimizes the deployment plan of LLM serving to accommodate the heterogeneous resource and network bandwidth conditions in cloud environments. Furthermore, we propose a lightweight re-scheduling mechanism, designed to adapt to fluctuating online conditions (e.g., node failures, workload shifts) without the need for costly restarts of ongoing services. Empirical results in both heterogeneous cloud and homogeneous in-house environments reveal that ThunderServe delivers up to a 2.1$\times$ and on average a $1.7\times$ increase in throughput and achieves up to a 2.5$\times$ and on average a $1.5\times$ reduction in latency deadlines compared with state-of-the-art systems given the same price budget, suggesting opting for cloud services provides a more cost-efficient solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09334v1-abstract-full').style.display = 'none'; document.getElementById('2502.09334v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MLSys 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08297">arXiv:2502.08297</a> <span> [<a href="https://arxiv.org/pdf/2502.08297">pdf</a>, <a href="https://arxiv.org/format/2502.08297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> BEAM: Bridging Physically-based Rendering and Gaussian Modeling for Relightable Volumetric Video </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yu Hong</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yize Wu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhehao Shen</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+C">Chengcheng Guo</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuheng Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yingliang Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jingyi Yu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+L">Lan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08297v1-abstract-short" style="display: inline;"> Volumetric video enables immersive experiences by capturing dynamic 3D scenes, enabling diverse applications for virtual reality, education, and telepresence. However, traditional methods struggle with fixed lighting conditions, while neural approaches face trade-offs in efficiency, quality, or adaptability for relightable scenarios. To address these limitations, we present BEAM, a novel pipeline… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08297v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08297v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08297v1-abstract-full" style="display: none;"> Volumetric video enables immersive experiences by capturing dynamic 3D scenes, enabling diverse applications for virtual reality, education, and telepresence. However, traditional methods struggle with fixed lighting conditions, while neural approaches face trade-offs in efficiency, quality, or adaptability for relightable scenarios. To address these limitations, we present BEAM, a novel pipeline that bridges 4D Gaussian representations with physically-based rendering (PBR) to produce high-quality, relightable volumetric videos from multi-view RGB footage. BEAM recovers detailed geometry and PBR properties via a series of available Gaussian-based techniques. It first combines Gaussian-based performance tracking with geometry-aware rasterization in a coarse-to-fine optimization framework to recover spatially and temporally consistent geometries. We further enhance Gaussian attributes by incorporating PBR properties step by step. We generate roughness via a multi-view-conditioned diffusion model, and then derive AO and base color using a 2D-to-3D strategy, incorporating a tailored Gaussian-based ray tracer for efficient visibility computation. Once recovered, these dynamic, relightable assets integrate seamlessly into traditional CG pipelines, supporting real-time rendering with deferred shading and offline rendering with ray tracing. By offering realistic, lifelike visualizations under diverse lighting conditions, BEAM opens new possibilities for interactive entertainment, storytelling, and creative visualization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08297v1-abstract-full').style.display = 'none'; document.getElementById('2502.08297v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08105">arXiv:2502.08105</a> <span> [<a href="https://arxiv.org/pdf/2502.08105">pdf</a>, <a href="https://arxiv.org/format/2502.08105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Out-of-Distribution Detection on Graphs: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+T">Tingyi Cai</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yunliang Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yixin Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Changqin Huang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+S">Shirui Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08105v1-abstract-short" style="display: inline;"> Graph machine learning has witnessed rapid growth, driving advancements across diverse domains. However, the in-distribution assumption, where training and testing data share the same distribution, often breaks in real-world scenarios, leading to degraded model performance under distribution shifts. This challenge has catalyzed interest in graph out-of-distribution (GOOD) detection, which focuses… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08105v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08105v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08105v1-abstract-full" style="display: none;"> Graph machine learning has witnessed rapid growth, driving advancements across diverse domains. However, the in-distribution assumption, where training and testing data share the same distribution, often breaks in real-world scenarios, leading to degraded model performance under distribution shifts. This challenge has catalyzed interest in graph out-of-distribution (GOOD) detection, which focuses on identifying graph data that deviates from the distribution seen during training, thereby enhancing model robustness. In this paper, we provide a rigorous definition of GOOD detection and systematically categorize existing methods into four types: enhancement-based, reconstruction-based, information propagation-based, and classification-based approaches. We analyze the principles and mechanisms of each approach and clarify the distinctions between GOOD detection and related fields, such as graph anomaly detection, outlier detection, and GOOD generalization. Beyond methodology, we discuss practical applications and theoretical foundations, highlighting the unique challenges posed by graph data. Finally, we discuss the primary challenges and propose future directions to advance this emerging field. The repository of this survey is available at https://github.com/ca1man-2022/Awesome-GOOD-Detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08105v1-abstract-full').style.display = 'none'; document.getElementById('2502.08105v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08032">arXiv:2502.08032</a> <span> [<a href="https://arxiv.org/pdf/2502.08032">pdf</a>, <a href="https://arxiv.org/format/2502.08032">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Shortcuts and Transitive-Closure Spanners Approximation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chalermsook%2C+P">Parinya Chalermsook</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yonggang Jiang</a>, <a href="/search/cs?searchtype=author&query=Mukhopadhyay%2C+S">Sagnik Mukhopadhyay</a>, <a href="/search/cs?searchtype=author&query=Nanongkai%2C+D">Danupon Nanongkai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08032v2-abstract-short" style="display: inline;"> We study polynomial-time approximation algorithms for two closely-related problems, namely computing shortcuts and transitive-closure spanners (TC spanner). For a directed unweighted graph $G=(V, E)$ and an integer $d$, a set of edges $E'\subseteq V\times V$ is called a $d$-TC spanner of $G$ if the graph $H:=(V, E')$ has (i) the same transitive-closure as $G$ and (ii) diameter at most $d.$ The set… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08032v2-abstract-full').style.display = 'inline'; document.getElementById('2502.08032v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08032v2-abstract-full" style="display: none;"> We study polynomial-time approximation algorithms for two closely-related problems, namely computing shortcuts and transitive-closure spanners (TC spanner). For a directed unweighted graph $G=(V, E)$ and an integer $d$, a set of edges $E'\subseteq V\times V$ is called a $d$-TC spanner of $G$ if the graph $H:=(V, E')$ has (i) the same transitive-closure as $G$ and (ii) diameter at most $d.$ The set $E''\subseteq V\times V$ is a $d$-shortcut of $G$ if $E\cup E''$ is a $d$-TC spanner of $G$. Our focus is on the following $(伪_D, 伪_S)$-approximation algorithm: given a directed graph $G$ and integers $d$ and $s$ such that $G$ admits a $d$-shortcut (respectively $d$-TC spanner) of size $s$, find a $(d伪_D)$-shortcut (resp. $(d伪_D)$-TC spanner) with $s伪_S$ edges, for as small $伪_S$ and $伪_D$ as possible. As our main result, we show that, under the Projection Game Conjecture (PGC), there exists a small constant $蔚>0$, such that no polynomial-time $(n^蔚,n^蔚)$-approximation algorithm exists for finding $d$-shortcuts as well as $d$-TC spanners of size $s$. Previously, super-constant lower bounds were known only for $d$-TC spanners with constant $d$ and ${伪_D}=1$ [Bhattacharyya, Grigorescu, Jung, Raskhodnikova, Woodruff 2009]. Similar lower bounds for super-constant $d$ were previously known only for a more general case of directed spanners [Elkin, Peleg 2000]. No hardness of approximation result was known for shortcuts prior to our result. As a side contribution, we complement the above with an upper bound of the form $(n^{纬_D}, n^{纬_S})$-approximation which holds for $3纬_D + 2纬_S > 1$ (e.g., $(n^{1/5+o(1)}, n^{1/5+o(1)})$-approximation). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08032v2-abstract-full').style.display = 'none'; document.getElementById('2502.08032v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07903">arXiv:2502.07903</a> <span> [<a href="https://arxiv.org/pdf/2502.07903">pdf</a>, <a href="https://arxiv.org/format/2502.07903">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> HexGen-2: Disaggregated Generative Inference of LLMs in Heterogeneous Environment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Youhe Jiang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+R">Ran Yan</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+B">Binhang Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07903v1-abstract-short" style="display: inline;"> Disaggregating the prefill and decoding phases represents an effective new paradigm for generative inference of large language models (LLM), which eliminates prefill-decoding interference and optimizes resource allocation. However, it is still an open problem about how to deploy the disaggregated inference paradigm across a group of heterogeneous GPUs, which can be an economical alternative to dep… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07903v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07903v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07903v1-abstract-full" style="display: none;"> Disaggregating the prefill and decoding phases represents an effective new paradigm for generative inference of large language models (LLM), which eliminates prefill-decoding interference and optimizes resource allocation. However, it is still an open problem about how to deploy the disaggregated inference paradigm across a group of heterogeneous GPUs, which can be an economical alternative to deployment over homogeneous high-performance GPUs. Towards this end, we introduce HexGen-2, a distributed system for efficient and economical LLM serving on heterogeneous GPUs following the disaggregated paradigm. Built on top of HexGen, the core component of HexGen-2 is a scheduling algorithm that formalizes the allocation of disaggregated LLM inference computations and communications over heterogeneous GPUs and network connections as a constraint optimization problem. We leverage the graph partitioning and max-flow algorithms to co-optimize resource allocation, parallel strategies for distinct inference phases, and the efficiency of inter-phase key-value (KV) cache communications. We conduct extensive experiments to evaluate HexGen-2, i.e., on OPT (30B) and Llama-2 (70B) models in various real-world settings, the results reveal that HexGen-2 delivers up to a 2.0 times and on average a 1.3 times improvement in serving throughput, reduces the average inference latency by 1.5 times compared with state-of-the-art systems given the same price budget, and achieves comparable inference performance with a 30% lower price budget. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07903v1-abstract-full').style.display = 'none'; document.getElementById('2502.07903v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07590">arXiv:2502.07590</a> <span> [<a href="https://arxiv.org/pdf/2502.07590">pdf</a>, <a href="https://arxiv.org/format/2502.07590">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DSV: Exploiting Dynamic Sparsity to Accelerate Large-Scale Video DiT Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xin Tan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuetao Chen</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yimin Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xing Chen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+K">Kun Yan</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+N">Nan Duan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yibo Zhu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+D">Daxin Jiang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07590v2-abstract-short" style="display: inline;"> Diffusion Transformers (DiTs) have shown remarkable performance in modeling and generating high-quality videos. However, the quadratic computational complexity of 3D full attention mechanism presents significant challenges in scaling video DiT training, especially for high-definition and lengthy videos, where attention can dominate up to 95% of the end-to-end time and necessitate specialized commu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07590v2-abstract-full').style.display = 'inline'; document.getElementById('2502.07590v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07590v2-abstract-full" style="display: none;"> Diffusion Transformers (DiTs) have shown remarkable performance in modeling and generating high-quality videos. However, the quadratic computational complexity of 3D full attention mechanism presents significant challenges in scaling video DiT training, especially for high-definition and lengthy videos, where attention can dominate up to 95% of the end-to-end time and necessitate specialized communication paradigms to handle large input sizes. This paper introduces DSV, a novel framework designed to accelerate and scale the training of video DiTs by leveraging the inherent dynamic attention sparsity throughout the training process. DSV employs a two-stage training algorithm that exploits sparsity patterns, focusing on critical elements supported by efficient, tailored kernels. To accommodate the new sparsity dimension, we develop a hybrid sparsity-aware context parallelism that effectively scales to large inputs by addressing the heterogeneity of sparsity across attention heads and blocks, resulting in optimized sparse computation and communication. Extensive evaluations demonstrate that DSV achieves up to 3.02x gain in training throughput with nearly no quality degradation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07590v2-abstract-full').style.display = 'none'; document.getElementById('2502.07590v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07472">arXiv:2502.07472</a> <span> [<a href="https://arxiv.org/pdf/2502.07472">pdf</a>, <a href="https://arxiv.org/format/2502.07472">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Robotic In-Hand Manipulation for Large-Range Precise Object Movement: The RGMC Champion Solution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+M">Mingrui Yu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yongpeng Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+Y">Yongyi Jia</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07472v1-abstract-short" style="display: inline;"> In-hand manipulation using multiple dexterous fingers is a critical robotic skill that can reduce the reliance on large arm motions, thereby saving space and energy. This letter focuses on in-grasp object movement, which refers to manipulating an object to a desired pose through only finger motions within a stable grasp. The key challenge lies in simultaneously achieving high precision and large-r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07472v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07472v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07472v1-abstract-full" style="display: none;"> In-hand manipulation using multiple dexterous fingers is a critical robotic skill that can reduce the reliance on large arm motions, thereby saving space and energy. This letter focuses on in-grasp object movement, which refers to manipulating an object to a desired pose through only finger motions within a stable grasp. The key challenge lies in simultaneously achieving high precision and large-range movements while maintaining a constant stable grasp. To address this problem, we propose a simple and practical approach based on kinematic trajectory optimization with no need for pretraining or object geometries, which can be easily applied to novel objects in real-world scenarios. Adopting this approach, we won the championship for the in-hand manipulation track at the 9th Robotic Grasping and Manipulation Competition (RGMC) held at ICRA 2024. Implementation details, discussion, and further quantitative experimental results are presented in this letter, which aims to comprehensively evaluate our approach and share our key takeaways from the competition. Supplementary materials including video and code are available at https://rgmc-xl-team.github.io/ingrasp_manipulation . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07472v1-abstract-full').style.display = 'none'; document.getElementById('2502.07472v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to RA-L. Project website: https://rgmc-xl-team.github.io/ingrasp_manipulation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06802">arXiv:2502.06802</a> <span> [<a href="https://arxiv.org/pdf/2502.06802">pdf</a>, <a href="https://arxiv.org/format/2502.06802">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Solving the Content Gap in Roblox Game Recommendations: LLM-Based Profile Generation and Reranking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chen Wang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+X">Xiaokai Wei</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yexi Jiang</a>, <a href="/search/cs?searchtype=author&query=Ong%2C+F">Frank Ong</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+K">Kevin Gao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiao Yu</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+Z">Zheng Hui</a>, <a href="/search/cs?searchtype=author&query=Yoon%2C+S">Se-eun Yoon</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+P">Philip Yu</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+M">Michelle Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06802v1-abstract-short" style="display: inline;"> With the vast and dynamic user-generated content on Roblox, creating effective game recommendations requires a deep understanding of game content. Traditional recommendation models struggle with the inconsistent and sparse nature of game text features such as titles and descriptions. Recent advancements in large language models (LLMs) offer opportunities to enhance recommendation systems by analyz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06802v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06802v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06802v1-abstract-full" style="display: none;"> With the vast and dynamic user-generated content on Roblox, creating effective game recommendations requires a deep understanding of game content. Traditional recommendation models struggle with the inconsistent and sparse nature of game text features such as titles and descriptions. Recent advancements in large language models (LLMs) offer opportunities to enhance recommendation systems by analyzing in-game text data. This paper addresses two challenges: generating high-quality, structured text features for games without extensive human annotation, and validating these features to ensure they improve recommendation relevance. We propose an approach that extracts in-game text and uses LLMs to infer attributes such as genre and gameplay objectives from raw player interactions. Additionally, we introduce an LLM-based re-ranking mechanism to assess the effectiveness of the generated text features, enhancing personalization and user satisfaction. Beyond recommendations, our approach supports applications such as user engagement-based integrity detection, already deployed in production. This scalable framework demonstrates the potential of in-game text understanding to improve recommendation quality on Roblox and adapt recommendations to its unique, user-generated ecosystem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06802v1-abstract-full').style.display = 'none'; document.getElementById('2502.06802v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06171">arXiv:2502.06171</a> <span> [<a href="https://arxiv.org/pdf/2502.06171">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Data-Efficient Pan-Tumor Foundation Model for Oncology CT Interpretation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lei%2C+W">Wenhui Lei</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hanyu Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zitian Zhang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Luyang Luo</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Q">Qiong Xiao</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yannian Gu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+P">Peng Gao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yankai Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Ci Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+G">Guangtao Wu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+T">Tongjia Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yingjie Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaofan Zhang</a>, <a href="/search/cs?searchtype=author&query=Rajpurkar%2C+P">Pranav Rajpurkar</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shaoting Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhenning Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06171v1-abstract-short" style="display: inline;"> Artificial intelligence-assisted imaging analysis has made substantial strides in tumor diagnosis and management. Here we present PASTA, a pan-tumor CT foundation model that achieves state-of-the-art performance on 45 of 46 representative oncology tasks -- including lesion segmentation, tumor detection in plain CT, tumor staging, survival prediction, structured report generation, and cross-modalit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06171v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06171v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06171v1-abstract-full" style="display: none;"> Artificial intelligence-assisted imaging analysis has made substantial strides in tumor diagnosis and management. Here we present PASTA, a pan-tumor CT foundation model that achieves state-of-the-art performance on 45 of 46 representative oncology tasks -- including lesion segmentation, tumor detection in plain CT, tumor staging, survival prediction, structured report generation, and cross-modality transfer learning, significantly outperforming the second-best models on 35 tasks. This remarkable advancement is driven by our development of PASTA-Gen, an innovative synthetic tumor generation framework that produces a comprehensive dataset of 30,000 CT scans with pixel-level annotated lesions and paired structured reports, encompassing malignancies across ten organs and five benign lesion types. By leveraging this rich, high-quality synthetic data, we overcome a longstanding bottleneck in the development of CT foundation models -- specifically, the scarcity of publicly available, high-quality annotated datasets due to privacy constraints and the substantial labor required for scaling precise data annotation. Encouragingly, PASTA demonstrates exceptional data efficiency with promising practical value, markedly improving performance on various tasks with only a small amount of real-world data. The open release of both the synthetic dataset and PASTA foundation model effectively addresses the challenge of data scarcity, thereby advancing oncological research and clinical translation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06171v1-abstract-full').style.display = 'none'; document.getElementById('2502.06171v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">57 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05878">arXiv:2502.05878</a> <span> [<a href="https://arxiv.org/pdf/2502.05878">pdf</a>, <a href="https://arxiv.org/format/2502.05878">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Financial Time-Series Forecasting with Retrieval-Augmented Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+M">Mengxi Xiao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zihao Jiang</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+L">Lingfei Qian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhengyu Chen</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yueru He</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yijing Xu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuecheng Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&query=Weng%2C+R">Ruey-Ling Weng</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+M">Min Peng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jimin Huang</a>, <a href="/search/cs?searchtype=author&query=Ananiadou%2C+S">Sophia Ananiadou</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Q">Qianqian Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05878v2-abstract-short" style="display: inline;"> Stock movement prediction, a critical task in financial time-series forecasting, relies on identifying and retrieving key influencing factors from vast and complex datasets. However, traditional text-trained or numeric similarity-based retrieval methods often struggle to handle the intricacies of financial data. To address this, we propose the first retrieval-augmented generation (RAG) framework s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05878v2-abstract-full').style.display = 'inline'; document.getElementById('2502.05878v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05878v2-abstract-full" style="display: none;"> Stock movement prediction, a critical task in financial time-series forecasting, relies on identifying and retrieving key influencing factors from vast and complex datasets. However, traditional text-trained or numeric similarity-based retrieval methods often struggle to handle the intricacies of financial data. To address this, we propose the first retrieval-augmented generation (RAG) framework specifically designed for financial time-series forecasting. Our framework incorporates three key innovations: a fine-tuned 1B large language model (StockLLM) as its backbone, a novel candidate selection method enhanced by LLM feedback, and a training objective that maximizes the similarity between queries and historically significant sequences. These advancements enable our retriever, FinSeer, to uncover meaningful patterns while effectively minimizing noise in complex financial datasets. To support robust evaluation, we also construct new datasets that integrate financial indicators and historical stock prices. Experimental results demonstrate that our RAG framework outperforms both the baseline StockLLM and random retrieval methods, showcasing its effectiveness. FinSeer, as the retriever, achieves an 8% higher accuracy on the BIGDATA22 benchmark and retrieves more impactful sequences compared to existing retrieval methods. This work highlights the importance of tailored retrieval models in financial forecasting and provides a novel, scalable framework for future research in the field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05878v2-abstract-full').style.display = 'none'; document.getElementById('2502.05878v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05206">arXiv:2502.05206</a> <span> [<a href="https://arxiv.org/pdf/2502.05206">pdf</a>, <a href="https://arxiv.org/format/2502.05206">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Safety at Scale: A Comprehensive Survey of Large Model Safety </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xingjun Ma</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yifeng Gao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yixu Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruofan Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Ye Sun</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yifan Ding</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hengyuan Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yunhao Chen</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yunhan Zhao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Hanxun Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yige Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiaming Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xiang Zheng</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yang Bai</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zuxuan Wu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jingfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yiming Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jun Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Cong Wang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+J">Jindong Gu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Baoyuan Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Siheng Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianwei Zhang</a> , et al. (19 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05206v2-abstract-short" style="display: inline;"> The rapid advancement of large models, driven by their exceptional abilities in learning and generalization through large-scale pre-training, has reshaped the landscape of Artificial Intelligence (AI). These models are now foundational to a wide range of applications, including conversational AI, recommendation systems, autonomous driving, content generation, medical diagnostics, and scientific di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05206v2-abstract-full').style.display = 'inline'; document.getElementById('2502.05206v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05206v2-abstract-full" style="display: none;"> The rapid advancement of large models, driven by their exceptional abilities in learning and generalization through large-scale pre-training, has reshaped the landscape of Artificial Intelligence (AI). These models are now foundational to a wide range of applications, including conversational AI, recommendation systems, autonomous driving, content generation, medical diagnostics, and scientific discovery. However, their widespread deployment also exposes them to significant safety risks, raising concerns about robustness, reliability, and ethical implications. This survey provides a systematic review of current safety research on large models, covering Vision Foundation Models (VFMs), Large Language Models (LLMs), Vision-Language Pre-training (VLP) models, Vision-Language Models (VLMs), Diffusion Models (DMs), and large-model-based Agents. Our contributions are summarized as follows: (1) We present a comprehensive taxonomy of safety threats to these models, including adversarial attacks, data poisoning, backdoor attacks, jailbreak and prompt injection attacks, energy-latency attacks, data and model extraction attacks, and emerging agent-specific threats. (2) We review defense strategies proposed for each type of attacks if available and summarize the commonly used datasets and benchmarks for safety research. (3) Building on this, we identify and discuss the open challenges in large model safety, emphasizing the need for comprehensive safety evaluations, scalable and effective defense mechanisms, and sustainable data practices. More importantly, we highlight the necessity of collective efforts from the research community and international collaboration. Our work can serve as a useful reference for researchers and practitioners, fostering the ongoing development of comprehensive defense systems and platforms to safeguard AI models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05206v2-abstract-full').style.display = 'none'; document.getElementById('2502.05206v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">47 pages, 3 figures, 11 tables GitHub: https://github.com/xingjunm/Awesome-Large-Model-Safety</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05179">arXiv:2502.05179</a> <span> [<a href="https://arxiv.org/pdf/2502.05179">pdf</a>, <a href="https://arxiv.org/format/2502.05179">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FlashVideo:Flowing Fidelity to Detail for Efficient High-Resolution Video Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shilong Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenbo Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shoufa Chen</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+C">Chongjian Ge</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+P">Peize Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yida Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yi Jiang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Z">Zehuan Yuan</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Binyue Peng</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+P">Ping Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05179v1-abstract-short" style="display: inline;"> DiT diffusion models have achieved great success in text-to-video generation, leveraging their scalability in model capacity and data scale. High content and motion fidelity aligned with text prompts, however, often require large model parameters and a substantial number of function evaluations (NFEs). Realistic and visually appealing details are typically reflected in high resolution outputs, fur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05179v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05179v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05179v1-abstract-full" style="display: none;"> DiT diffusion models have achieved great success in text-to-video generation, leveraging their scalability in model capacity and data scale. High content and motion fidelity aligned with text prompts, however, often require large model parameters and a substantial number of function evaluations (NFEs). Realistic and visually appealing details are typically reflected in high resolution outputs, further amplifying computational demands especially for single stage DiT models. To address these challenges, we propose a novel two stage framework, FlashVideo, which strategically allocates model capacity and NFEs across stages to balance generation fidelity and quality. In the first stage, prompt fidelity is prioritized through a low resolution generation process utilizing large parameters and sufficient NFEs to enhance computational efficiency. The second stage establishes flow matching between low and high resolutions, effectively generating fine details with minimal NFEs. Quantitative and visual results demonstrate that FlashVideo achieves state-of-the-art high resolution video generation with superior computational efficiency. Additionally, the two-stage design enables users to preview the initial output before committing to full resolution generation, thereby significantly reducing computational costs and wait times as well as enhancing commercial viability . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05179v1-abstract-full').style.display = 'none'; document.getElementById('2502.05179v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Model and Weight: https://github.com/FoundationVision/FlashVideo</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04896">arXiv:2502.04896</a> <span> [<a href="https://arxiv.org/pdf/2502.04896">pdf</a>, <a href="https://arxiv.org/format/2502.04896">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Goku: Flow Based Video Generative Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shoufa Chen</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+C">Chongjian Ge</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yida Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+F">Fengda Zhu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hao Yang</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+H">Hongxiang Hao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Hui Wu</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Z">Zhichao Lai</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yifei Hu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+T">Ting-Che Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shilong Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chuan Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xing Wang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Y">Yanghua Peng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+P">Peize Sun</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+P">Ping Luo</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yi Jiang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Z">Zehuan Yuan</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Bingyue Peng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaobing Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04896v2-abstract-short" style="display: inline;"> This paper introduces Goku, a state-of-the-art family of joint image-and-video generation models leveraging rectified flow Transformers to achieve industry-leading performance. We detail the foundational elements enabling high-quality visual generation, including the data curation pipeline, model architecture design, flow formulation, and advanced infrastructure for efficient and robust large-scal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04896v2-abstract-full').style.display = 'inline'; document.getElementById('2502.04896v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04896v2-abstract-full" style="display: none;"> This paper introduces Goku, a state-of-the-art family of joint image-and-video generation models leveraging rectified flow Transformers to achieve industry-leading performance. We detail the foundational elements enabling high-quality visual generation, including the data curation pipeline, model architecture design, flow formulation, and advanced infrastructure for efficient and robust large-scale training. The Goku models demonstrate superior performance in both qualitative and quantitative evaluations, setting new benchmarks across major tasks. Specifically, Goku achieves 0.76 on GenEval and 83.65 on DPG-Bench for text-to-image generation, and 84.85 on VBench for text-to-video tasks. We believe that this work provides valuable insights and practical advancements for the research community in developing joint image-and-video generation models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04896v2-abstract-full').style.display = 'none'; document.getElementById('2502.04896v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Demo: https://saiyan-world.github.io/goku/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04623">arXiv:2502.04623</a> <span> [<a href="https://arxiv.org/pdf/2502.04623">pdf</a>, <a href="https://arxiv.org/format/2502.04623">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HetSSNet: Spatial-Spectral Heterogeneous Graph Learning Network for Panchromatic and Multispectral Images Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+M">Mengting Ma</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yizhen Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Mengjiao Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiaxin Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04623v1-abstract-short" style="display: inline;"> Remote sensing pansharpening aims to reconstruct spatial-spectral properties during the fusion of panchromatic (PAN) images and low-resolution multi-spectral (LR-MS) images, finally generating the high-resolution multi-spectral (HR-MS) images. In the mainstream modeling strategies, i.e., CNN and Transformer, the input images are treated as the equal-sized grid of pixels in the Euclidean space. The… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04623v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04623v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04623v1-abstract-full" style="display: none;"> Remote sensing pansharpening aims to reconstruct spatial-spectral properties during the fusion of panchromatic (PAN) images and low-resolution multi-spectral (LR-MS) images, finally generating the high-resolution multi-spectral (HR-MS) images. In the mainstream modeling strategies, i.e., CNN and Transformer, the input images are treated as the equal-sized grid of pixels in the Euclidean space. They have limitations in facing remote sensing images with irregular ground objects. Graph is the more flexible structure, however, there are two major challenges when modeling spatial-spectral properties with graph: \emph{1) constructing the customized graph structure for spatial-spectral relationship priors}; \emph{2) learning the unified spatial-spectral representation through the graph}. To address these challenges, we propose the spatial-spectral heterogeneous graph learning network, named \textbf{HetSSNet}. Specifically, HetSSNet initially constructs the heterogeneous graph structure for pansharpening, which explicitly describes pansharpening-specific relationships. Subsequently, the basic relationship pattern generation module is designed to extract the multiple relationship patterns from the heterogeneous graph. Finally, relationship pattern aggregation module is exploited to collaboratively learn unified spatial-spectral representation across different relationships among nodes with adaptive importance learning from local and global perspectives. Extensive experiments demonstrate the significant superiority and generalization of HetSSNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04623v1-abstract-full').style.display = 'none'; document.getElementById('2502.04623v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03885">arXiv:2502.03885</a> <span> [<a href="https://arxiv.org/pdf/2502.03885">pdf</a>, <a href="https://arxiv.org/format/2502.03885">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> InfinitePOD: Building Datacenter-Scale High-Bandwidth Domain for LLM with Optical Circuit Switching Transceivers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shou%2C+C">Chenchen Shou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guyue Liu</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+H">Hao Nie</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+H">Huaiyu Meng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yu Zhou</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yimin Jiang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+W">Wenqing Lv</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yelong Xu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yuanwei Lu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhang Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yanbo Yu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yichen Shen</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yibo Zhu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+D">Daxin Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03885v2-abstract-short" style="display: inline;"> Scaling Large Language Model (LLM) training relies on multi-dimensional parallelism, where High-Bandwidth Domains (HBDs) are critical for communication-intensive parallelism like Tensor Parallelism (TP) and Expert Parallelism (EP). However, existing HBD architectures face fundamental limitations in scalability, cost, and fault resiliency: switch-centric HBDs (e.g., NVL-72) incur prohibitive scalin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03885v2-abstract-full').style.display = 'inline'; document.getElementById('2502.03885v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03885v2-abstract-full" style="display: none;"> Scaling Large Language Model (LLM) training relies on multi-dimensional parallelism, where High-Bandwidth Domains (HBDs) are critical for communication-intensive parallelism like Tensor Parallelism (TP) and Expert Parallelism (EP). However, existing HBD architectures face fundamental limitations in scalability, cost, and fault resiliency: switch-centric HBDs (e.g., NVL-72) incur prohibitive scaling costs, while GPU-centric HBDs (e.g., TPUv3/Dojo) suffer from severe fault propagation. Switch-GPU hybrid HBDs such as TPUv4 takes a middle-ground approach by leveraging Optical Circuit Switches, but the fault explosion radius remains large at the cube level (e.g., 64 TPUs). We propose InfinitePOD, a novel transceiver-centric HBD architecture that unifies connectivity and dynamic switching at the transceiver level using Optical Circuit Switching (OCS). By embedding OCS within each transceiver, InfinitePOD achieves reconfigurable point-to-multipoint connectivity, allowing the topology to adapt into variable-size rings. This design provides: i) datacenter-wide scalability without cost explosion; ii) fault resilience by isolating failures to a single node, and iii) full bandwidth utilization for fault-free GPUs. Key innovations include a Silicon Photonic (SiPh) based low-cost OCS transceiver (OCSTrx), a reconfigurable k-hop ring topology co-designed with intra-/inter-node communication, and an HBD-DCN orchestration algorithm maximizing GPU utilization while minimizing cross-ToR datacenter network traffic. The evaluation demonstrates that InfinitePOD achieves 31% of the cost of NVL-72, near-zero GPU waste ratio (over one order of magnitude lower than NVL-72 and TPUv4), near-zero cross-ToR traffic when node fault ratios under 7%, and improves Model FLOPs Utilization by 3.37x compared to NVIDIA DGX (8 GPUs per Node). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03885v2-abstract-full').style.display = 'none'; document.getElementById('2502.03885v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03721">arXiv:2502.03721</a> <span> [<a href="https://arxiv.org/pdf/2502.03721">pdf</a>, <a href="https://arxiv.org/format/2502.03721">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Detecting Backdoor Attacks via Similarity in Semantic Communication Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Ziyang Wei</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yili Jiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiaqi Huang</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+F">Fangtian Zhong</a>, <a href="/search/cs?searchtype=author&query=Gyawali%2C+S">Sohan Gyawali</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03721v1-abstract-short" style="display: inline;"> Semantic communication systems, which leverage Generative AI (GAI) to transmit semantic meaning rather than raw data, are poised to revolutionize modern communications. However, they are vulnerable to backdoor attacks, a type of poisoning manipulation that embeds malicious triggers into training datasets. As a result, Backdoor attacks mislead the inference for poisoned samples while clean samples… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03721v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03721v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03721v1-abstract-full" style="display: none;"> Semantic communication systems, which leverage Generative AI (GAI) to transmit semantic meaning rather than raw data, are poised to revolutionize modern communications. However, they are vulnerable to backdoor attacks, a type of poisoning manipulation that embeds malicious triggers into training datasets. As a result, Backdoor attacks mislead the inference for poisoned samples while clean samples remain unaffected. The existing defenses may alter the model structure (such as neuron pruning that potentially degrades inference performance on clean inputs, or impose strict requirements on data formats (such as ``Semantic Shield" that requires image-text pairs). To address these limitations, this work proposes a defense mechanism that leverages semantic similarity to detect backdoor attacks without modifying the model structure or imposing data format constraints. By analyzing deviations in semantic feature space and establishing a threshold-based detection framework, the proposed approach effectively identifies poisoned samples. The experimental results demonstrate high detection accuracy and recall across varying poisoning ratios, underlining the significant effectiveness of our proposed solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03721v1-abstract-full').style.display = 'none'; document.getElementById('2502.03721v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03449">arXiv:2502.03449</a> <span> [<a href="https://arxiv.org/pdf/2502.03449">pdf</a>, <a href="https://arxiv.org/format/2502.03449">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Dress-1-to-3: Single Image to Simulation-Ready 3D Outfit with Diffusion Prior and Differentiable Physics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuan Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chang Yu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+W">Wenxin Du</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Ying Jiang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyi Xie</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yunuo Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yin Yang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03449v1-abstract-short" style="display: inline;"> Recent advances in large models have significantly advanced image-to-3D reconstruction. However, the generated models are often fused into a single piece, limiting their applicability in downstream tasks. This paper focuses on 3D garment generation, a key area for applications like virtual try-on with dynamic garment animations, which require garments to be separable and simulation-ready. We intro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03449v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03449v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03449v1-abstract-full" style="display: none;"> Recent advances in large models have significantly advanced image-to-3D reconstruction. However, the generated models are often fused into a single piece, limiting their applicability in downstream tasks. This paper focuses on 3D garment generation, a key area for applications like virtual try-on with dynamic garment animations, which require garments to be separable and simulation-ready. We introduce Dress-1-to-3, a novel pipeline that reconstructs physics-plausible, simulation-ready separated garments with sewing patterns and humans from an in-the-wild image. Starting with the image, our approach combines a pre-trained image-to-sewing pattern generation model for creating coarse sewing patterns with a pre-trained multi-view diffusion model to produce multi-view images. The sewing pattern is further refined using a differentiable garment simulator based on the generated multi-view images. Versatile experiments demonstrate that our optimization approach substantially enhances the geometric alignment of the reconstructed 3D garments and humans with the input image. Furthermore, by integrating a texture generation module and a human motion generation module, we produce customized physics-plausible and realistic dynamic garment demonstrations. Project page: https://dress-1-to-3.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03449v1-abstract-full').style.display = 'none'; document.getElementById('2502.03449v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://dress-1-to-3.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03330">arXiv:2502.03330</a> <span> [<a href="https://arxiv.org/pdf/2502.03330">pdf</a>, <a href="https://arxiv.org/format/2502.03330">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Controllable GUI Exploration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+A">Aryan Garg</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yue Jiang</a>, <a href="/search/cs?searchtype=author&query=Oulasvirta%2C+A">Antti Oulasvirta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03330v1-abstract-short" style="display: inline;"> During the early stages of interface design, designers need to produce multiple sketches to explore a design space. Design tools often fail to support this critical stage, because they insist on specifying more details than necessary. Although recent advances in generative AI have raised hopes of solving this issue, in practice they fail because expressing loose ideas in a prompt is impractical. I… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03330v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03330v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03330v1-abstract-full" style="display: none;"> During the early stages of interface design, designers need to produce multiple sketches to explore a design space. Design tools often fail to support this critical stage, because they insist on specifying more details than necessary. Although recent advances in generative AI have raised hopes of solving this issue, in practice they fail because expressing loose ideas in a prompt is impractical. In this paper, we propose a diffusion-based approach to the low-effort generation of interface sketches. It breaks new ground by allowing flexible control of the generation process via three types of inputs: A) prompts, B) wireframes, and C) visual flows. The designer can provide any combination of these as input at any level of detail, and will get a diverse gallery of low-fidelity solutions in response. The unique benefit is that large design spaces can be explored rapidly with very little effort in input-specification. We present qualitative results for various combinations of input specifications. Additionally, we demonstrate that our model aligns more accurately with these specifications than other models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03330v1-abstract-full').style.display = 'none'; document.getElementById('2502.03330v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02409">arXiv:2502.02409</a> <span> [<a href="https://arxiv.org/pdf/2502.02409">pdf</a>, <a href="https://arxiv.org/format/2502.02409">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Extending SEEDS to a Supervoxel Algorithm for Medical Image Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Chenhui Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yan Jiang</a>, <a href="/search/cs?searchtype=author&query=Hollon%2C+T+C">Todd C. Hollon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02409v1-abstract-short" style="display: inline;"> In this work, we extend the SEEDS superpixel algorithm from 2D images to 3D volumes, resulting in 3D SEEDS, a faster, better, and open-source supervoxel algorithm for medical image analysis. We compare 3D SEEDS with the widely used supervoxel algorithm SLIC on 13 segmentation tasks across 10 organs. 3D SEEDS accelerates supervoxel generation by a factor of 10, improves the achievable Dice score by… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02409v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02409v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02409v1-abstract-full" style="display: none;"> In this work, we extend the SEEDS superpixel algorithm from 2D images to 3D volumes, resulting in 3D SEEDS, a faster, better, and open-source supervoxel algorithm for medical image analysis. We compare 3D SEEDS with the widely used supervoxel algorithm SLIC on 13 segmentation tasks across 10 organs. 3D SEEDS accelerates supervoxel generation by a factor of 10, improves the achievable Dice score by +6.5%, and reduces the under-segmentation error by -0.16%. The code is available at https://github.com/Zch0414/3d_seeds <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02409v1-abstract-full').style.display = 'none'; document.getElementById('2502.02409v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Tech report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01662">arXiv:2502.01662</a> <span> [<a href="https://arxiv.org/pdf/2502.01662">pdf</a>, <a href="https://arxiv.org/format/2502.01662">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Speculative Ensemble: Fast Large Language Model Ensemble via Speculation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fu%2C+J">Jiale Fu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuchu Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Junkai Chen</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jiaming Fan</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+X">Xin Geng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xu Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01662v1-abstract-short" style="display: inline;"> Ensemble methods enhance Large Language Models (LLMs) by combining multiple models but suffer from high computational costs. In this paper, we introduce Speculative Ensemble, a novel framework that accelerates LLM ensembles without sacrificing performance, inspired by Speculative Decoding-where a small proposal model generates tokens sequentially, and a larger target model verifies them in paralle… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01662v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01662v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01662v1-abstract-full" style="display: none;"> Ensemble methods enhance Large Language Models (LLMs) by combining multiple models but suffer from high computational costs. In this paper, we introduce Speculative Ensemble, a novel framework that accelerates LLM ensembles without sacrificing performance, inspired by Speculative Decoding-where a small proposal model generates tokens sequentially, and a larger target model verifies them in parallel. Our approach builds on two key insights: (1) the verification distribution can be the ensemble distribution of both the proposal and target models, and (2) alternating each model as the proposer and verifier can further enhance efficiency. We generalize this method to ensembles with n models and theoretically prove that SE is never slower than a standard ensemble, typically achieving faster speed. Extensive experiments demonstrate speed improvements of 1.11x-2.23x over standard ensemble techniques without compromising generation quality. Our code is available at https://github.com/Kamichanw/Speculative-Ensemble/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01662v1-abstract-full').style.display = 'none'; document.getElementById('2502.01662v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00722">arXiv:2502.00722</a> <span> [<a href="https://arxiv.org/pdf/2502.00722">pdf</a>, <a href="https://arxiv.org/format/2502.00722">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Demystifying Cost-Efficiency in LLM Serving over Heterogeneous GPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Youhe Jiang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+F">Fangcheng Fu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaozhe Yao</a>, <a href="/search/cs?searchtype=author&query=He%2C+G">Guoliang He</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+X">Xupeng Miao</a>, <a href="/search/cs?searchtype=author&query=Klimovic%2C+A">Ana Klimovic</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+B">Bin Cui</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+B">Binhang Yuan</a>, <a href="/search/cs?searchtype=author&query=Yoneki%2C+E">Eiko Yoneki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00722v1-abstract-short" style="display: inline;"> Recent advancements in Large Language Models (LLMs) have led to increasingly diverse requests, accompanied with varying resource (compute and memory) demands to serve them. However, this in turn degrades the cost-efficiency of LLM serving as common practices primarily rely on homogeneous GPU resources. In response to this problem, this work conducts a thorough study about serving LLMs over heterog… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00722v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00722v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00722v1-abstract-full" style="display: none;"> Recent advancements in Large Language Models (LLMs) have led to increasingly diverse requests, accompanied with varying resource (compute and memory) demands to serve them. However, this in turn degrades the cost-efficiency of LLM serving as common practices primarily rely on homogeneous GPU resources. In response to this problem, this work conducts a thorough study about serving LLMs over heterogeneous GPU resources on cloud platforms. The rationale is that different GPU types exhibit distinct compute and memory characteristics, aligning well with the divergent resource demands of diverse requests. Particularly, through comprehensive benchmarking, we discover that the cost-efficiency of LLM serving can be substantially optimized by meticulously determining GPU composition, deployment configurations, and workload assignments. Subsequently, we design a scheduling algorithm via mixed-integer linear programming, aiming at deducing the most cost-efficient serving plan under the constraints of price budget and real-time GPU availability. Remarkably, our approach effectively outperforms homogeneous and heterogeneous baselines under a wide array of scenarios, covering diverse workload traces, varying GPU availablilities, and multi-model serving. This casts new light on more accessible and efficient LLM serving over heterogeneous cloud resources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00722v1-abstract-full').style.display = 'none'; document.getElementById('2502.00722v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00652">arXiv:2502.00652</a> <span> [<a href="https://arxiv.org/pdf/2502.00652">pdf</a>, <a href="https://arxiv.org/format/2502.00652">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Reformulation is All You Need: Addressing Malicious Text Features in DNNs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yi Jiang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+O">Oubo Ma</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yong Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tong Zhang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+S">Shouling Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00652v1-abstract-short" style="display: inline;"> Human language encompasses a wide range of intricate and diverse implicit features, which attackers can exploit to launch adversarial or backdoor attacks, compromising DNN models for NLP tasks. Existing model-oriented defenses often require substantial computational resources as model size increases, whereas sample-oriented defenses typically focus on specific attack vectors or schemes, rendering… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00652v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00652v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00652v1-abstract-full" style="display: none;"> Human language encompasses a wide range of intricate and diverse implicit features, which attackers can exploit to launch adversarial or backdoor attacks, compromising DNN models for NLP tasks. Existing model-oriented defenses often require substantial computational resources as model size increases, whereas sample-oriented defenses typically focus on specific attack vectors or schemes, rendering them vulnerable to adaptive attacks. We observe that the root cause of both adversarial and backdoor attacks lies in the encoding process of DNN models, where subtle textual features, negligible for human comprehension, are erroneously assigned significant weight by less robust or trojaned models. Based on it we propose a unified and adaptive defense framework that is effective against both adversarial and backdoor attacks. Our approach leverages reformulation modules to address potential malicious features in textual inputs while preserving the original semantic integrity. Extensive experiments demonstrate that our framework outperforms existing sample-oriented defense baselines across a diverse range of malicious textual features. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00652v1-abstract-full').style.display = 'none'; document.getElementById('2502.00652v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00010">arXiv:2502.00010</a> <span> [<a href="https://arxiv.org/pdf/2502.00010">pdf</a>, <a href="https://arxiv.org/format/2502.00010">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> IntelliChain: An Integrated Framework for Enhanced Socratic Method Dialogue with LLMs and Knowledge Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qi%2C+C">Changyong Qi</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+L">Linzhao Jia</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Y">Yuang Wei</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuan-Hao Jiang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+X">Xiaoqing Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00010v1-abstract-short" style="display: inline;"> With the continuous advancement of educational technology, the demand for Large Language Models (LLMs) as intelligent educational agents in providing personalized learning experiences is rapidly increasing. This study aims to explore how to optimize the design and collaboration of a multi-agent system tailored for Socratic teaching through the integration of LLMs and knowledge graphs in a chain-of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00010v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00010v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00010v1-abstract-full" style="display: none;"> With the continuous advancement of educational technology, the demand for Large Language Models (LLMs) as intelligent educational agents in providing personalized learning experiences is rapidly increasing. This study aims to explore how to optimize the design and collaboration of a multi-agent system tailored for Socratic teaching through the integration of LLMs and knowledge graphs in a chain-of-thought dialogue approach, thereby enhancing the accuracy and reliability of educational applications. By incorporating knowledge graphs, this research has bolstered the capability of LLMs to handle specific educational content, ensuring the accuracy and relevance of the information provided. Concurrently, we have focused on developing an effective multi-agent collaboration mechanism to facilitate efficient information exchange and chain dialogues among intelligent agents, significantly improving the quality of educational interaction and learning outcomes. In empirical research within the domain of mathematics education, this framework has demonstrated notable advantages in enhancing the accuracy and credibility of educational interactions. This study not only showcases the potential application of LLMs and knowledge graphs in mathematics teaching but also provides valuable insights and methodologies for the development of future AI-driven educational solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00010v1-abstract-full').style.display = 'none'; document.getElementById('2502.00010v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Conference Proceedings of the 28th Global Chinese Conference on Computers in Education, GCCCE 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19298">arXiv:2501.19298</a> <span> [<a href="https://arxiv.org/pdf/2501.19298">pdf</a>, <a href="https://arxiv.org/format/2501.19298">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Synthetic User Behavior Sequence Generation with Large Language Models for Smart Homes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhiyao Xu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Dan Zhao</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+Q">Qingsong Zou</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+J">Jingyu Xiao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yong Jiang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Z">Zhenhui Yuan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qing Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19298v1-abstract-short" style="display: inline;"> In recent years, as smart home systems have become more widespread, security concerns within these environments have become a growing threat. Currently, most smart home security solutions, such as anomaly detection and behavior prediction models, are trained using fixed datasets that are precollected. However, the process of dataset collection is time-consuming and lacks the flexibility needed to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19298v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19298v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19298v1-abstract-full" style="display: none;"> In recent years, as smart home systems have become more widespread, security concerns within these environments have become a growing threat. Currently, most smart home security solutions, such as anomaly detection and behavior prediction models, are trained using fixed datasets that are precollected. However, the process of dataset collection is time-consuming and lacks the flexibility needed to adapt to the constantly evolving smart home environment. Additionally, the collection of personal data raises significant privacy concerns for users. Lately, large language models (LLMs) have emerged as a powerful tool for a wide range of tasks across diverse application domains, thanks to their strong capabilities in natural language processing, reasoning, and problem-solving. In this paper, we propose an LLM-based synthetic dataset generation IoTGen framework to enhance the generalization of downstream smart home intelligent models. By generating new synthetic datasets that reflect changes in the environment, smart home intelligent models can be retrained to overcome the limitations of fixed and outdated data, allowing them to better align with the dynamic nature of real-world home environments. Specifically, we first propose a Structure Pattern Perception Compression (SPPC) method tailored for IoT behavior data, which preserves the most informative content in the data while significantly reducing token consumption. Then, we propose a systematic approach to create prompts and implement data generation to automatically generate IoT synthetic data with normative and reasonable properties, assisting task models in adaptive training to improve generalization and real-world performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19298v1-abstract-full').style.display = 'none'; document.getElementById('2501.19298v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17944">arXiv:2501.17944</a> <span> [<a href="https://arxiv.org/pdf/2501.17944">pdf</a>, <a href="https://arxiv.org/format/2501.17944">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> WaterWise: Co-optimizing Carbon- and Water-Footprint Toward Environmentally Sustainable Cloud Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yankai Jiang</a>, <a href="/search/cs?searchtype=author&query=Roy%2C+R+B">Rohan Basu Roy</a>, <a href="/search/cs?searchtype=author&query=Kanakagiri%2C+R">Raghavendra Kanakagiri</a>, <a href="/search/cs?searchtype=author&query=Tiwari%2C+D">Devesh Tiwari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17944v2-abstract-short" style="display: inline;"> The carbon and water footprint of large-scale computing systems poses serious environmental sustainability risks. In this study, we discover that, unfortunately, carbon and water sustainability are at odds with each other - and, optimizing one alone hurts the other. Toward that goal, we introduce, WaterWise, a novel job scheduler for parallel workloads that intelligently co-optimizes carbon and wa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17944v2-abstract-full').style.display = 'inline'; document.getElementById('2501.17944v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17944v2-abstract-full" style="display: none;"> The carbon and water footprint of large-scale computing systems poses serious environmental sustainability risks. In this study, we discover that, unfortunately, carbon and water sustainability are at odds with each other - and, optimizing one alone hurts the other. Toward that goal, we introduce, WaterWise, a novel job scheduler for parallel workloads that intelligently co-optimizes carbon and water footprint to improve the sustainability of geographically distributed data centers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17944v2-abstract-full').style.display = 'none'; document.getElementById('2501.17944v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16550">arXiv:2501.16550</a> <span> [<a href="https://arxiv.org/pdf/2501.16550">pdf</a>, <a href="https://arxiv.org/format/2501.16550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PhysAnimator: Physics-Guided Generative Cartoon Animation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyi Xie</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yiwei Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Ying Jiang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16550v1-abstract-short" style="display: inline;"> Creating hand-drawn animation sequences is labor-intensive and demands professional expertise. We introduce PhysAnimator, a novel approach for generating physically plausible meanwhile anime-stylized animation from static anime illustrations. Our method seamlessly integrates physics-based simulations with data-driven generative models to produce dynamic and visually compelling animations. To captu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16550v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16550v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16550v1-abstract-full" style="display: none;"> Creating hand-drawn animation sequences is labor-intensive and demands professional expertise. We introduce PhysAnimator, a novel approach for generating physically plausible meanwhile anime-stylized animation from static anime illustrations. Our method seamlessly integrates physics-based simulations with data-driven generative models to produce dynamic and visually compelling animations. To capture the fluidity and exaggeration characteristic of anime, we perform image-space deformable body simulations on extracted mesh geometries. We enhance artistic control by introducing customizable energy strokes and incorporating rigging point support, enabling the creation of tailored animation effects such as wind interactions. Finally, we extract and warp sketches from the simulation sequence, generating a texture-agnostic representation, and employ a sketch-guided video diffusion model to synthesize high-quality animation frames. The resulting animations exhibit temporal consistency and visual plausibility, demonstrating the effectiveness of our method in creating dynamic anime-style animations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16550v1-abstract-full').style.display = 'none'; document.getElementById('2501.16550v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16386">arXiv:2501.16386</a> <span> [<a href="https://arxiv.org/pdf/2501.16386">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ILETIA: An AI-enhanced method for individualized trigger-oocyte pickup interval estimation of progestin-primed ovarian stimulation protocol </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+B">Binjian Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qian Li</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+Z">Zhe Kuang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+H">Hongyuan Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xinyi Liu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Haiyan Guo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qiuju Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xinyi Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yangruizhe Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zha%2C+J">Jinyin Zha</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mingyu Li</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Q">Qiuhan Ren</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+S">Sishuo Feng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haicang Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuefeng Lu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jian Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16386v1-abstract-short" style="display: inline;"> In vitro fertilization-embryo transfer (IVF-ET) stands as one of the most prevalent treatments for infertility. During an IVF-ET cycle, the time interval between trigger shot and oocyte pickup (OPU) is a pivotal period for follicular maturation, which determines mature oocytes yields and impacts the success of subsequent procedures. However, accurately predicting this interval is severely hindered… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16386v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16386v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16386v1-abstract-full" style="display: none;"> In vitro fertilization-embryo transfer (IVF-ET) stands as one of the most prevalent treatments for infertility. During an IVF-ET cycle, the time interval between trigger shot and oocyte pickup (OPU) is a pivotal period for follicular maturation, which determines mature oocytes yields and impacts the success of subsequent procedures. However, accurately predicting this interval is severely hindered by the variability of clinicians'experience that often leads to suboptimal oocyte retrieval rate. To address this challenge, we propose ILETIA, the first machine learning-based method that could predict the optimal trigger-OPU interval for patients receiving progestin-primed ovarian stimulation (PPOS) protocol. Specifically, ILETIA leverages a Transformer to learn representations from clinical tabular data, and then employs gradient-boosted trees for interval prediction. For model training and evaluating, we compiled a dataset PPOS-DS of nearly ten thousand patients receiving PPOS protocol, the largest such dataset to our knowledge. Experimental results demonstrate that our method achieves strong performance (AUROC = 0.889), outperforming both clinicians and other widely used computational models. Moreover, ILETIA also supports premature ovulation risk prediction in a specific OPU time (AUROC = 0.838). Collectively, by enabling more precise and individualized decisions, ILETIA has the potential to improve clinical outcomes and lay the foundation for future IVF-ET research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16386v1-abstract-full').style.display = 'none'; document.getElementById('2501.16386v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15489">arXiv:2501.15489</a> <span> [<a href="https://arxiv.org/pdf/2501.15489">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> AI in Oncology: Transforming Cancer Detection through Machine Learning and Deep Learning Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Aftab%2C+M">Muhammad Aftab</a>, <a href="/search/cs?searchtype=author&query=Mehmood%2C+F">Faisal Mehmood</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chengjuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Nadeem%2C+A">Alishba Nadeem</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zigang Dong</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yanan Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kangdongs Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15489v1-abstract-short" style="display: inline;"> Artificial intelligence (AI) has potential to revolutionize the field of oncology by enhancing the precision of cancer diagnosis, optimizing treatment strategies, and personalizing therapies for a variety of cancers. This review examines the limitations of conventional diagnostic techniques and explores the transformative role of AI in diagnosing and treating cancers such as lung, breast, colorect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15489v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15489v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15489v1-abstract-full" style="display: none;"> Artificial intelligence (AI) has potential to revolutionize the field of oncology by enhancing the precision of cancer diagnosis, optimizing treatment strategies, and personalizing therapies for a variety of cancers. This review examines the limitations of conventional diagnostic techniques and explores the transformative role of AI in diagnosing and treating cancers such as lung, breast, colorectal, liver, stomach, esophageal, cervical, thyroid, prostate, and skin cancers. The primary objective of this paper is to highlight the significant advancements that AI algorithms have brought to oncology within the medical industry. By enabling early cancer detection, improving diagnostic accuracy, and facilitating targeted treatment delivery, AI contributes to substantial improvements in patient outcomes. The integration of AI in medical imaging, genomic analysis, and pathology enhances diagnostic precision and introduces a novel, less invasive approach to cancer screening. This not only boosts the effectiveness of medical facilities but also reduces operational costs. The study delves into the application of AI in radiomics for detailed cancer characterization, predictive analytics for identifying associated risks, and the development of algorithm-driven robots for immediate diagnosis. Furthermore, it investigates the impact of AI on addressing healthcare challenges, particularly in underserved and remote regions. The overarching goal of this platform is to support the development of expert recommendations and to provide universal, efficient diagnostic procedures. By reviewing existing research and clinical studies, this paper underscores the pivotal role of AI in improving the overall cancer care system. It emphasizes how AI-enabled systems can enhance clinical decision-making and expand treatment options, thereby underscoring the importance of AI in advancing precision oncology <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15489v1-abstract-full').style.display = 'none'; document.getElementById('2501.15489v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15418">arXiv:2501.15418</a> <span> [<a href="https://arxiv.org/pdf/2501.15418">pdf</a>, <a href="https://arxiv.org/format/2501.15418">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Episodic Novelty Through Temporal Distance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuhua Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qihan Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yiqin Yang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xiaoteng Ma</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+D">Dianyu Zhong</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Hao Hu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jun Yang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+B">Bin Liang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+B">Bo Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chongjie Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Q">Qianchuan Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15418v1-abstract-short" style="display: inline;"> Exploration in sparse reward environments remains a significant challenge in reinforcement learning, particularly in Contextual Markov Decision Processes (CMDPs), where environments differ across episodes. Existing episodic intrinsic motivation methods for CMDPs primarily rely on count-based approaches, which are ineffective in large state spaces, or on similarity-based methods that lack appropria… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15418v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15418v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15418v1-abstract-full" style="display: none;"> Exploration in sparse reward environments remains a significant challenge in reinforcement learning, particularly in Contextual Markov Decision Processes (CMDPs), where environments differ across episodes. Existing episodic intrinsic motivation methods for CMDPs primarily rely on count-based approaches, which are ineffective in large state spaces, or on similarity-based methods that lack appropriate metrics for state comparison. To address these shortcomings, we propose Episodic Novelty Through Temporal Distance (ETD), a novel approach that introduces temporal distance as a robust metric for state similarity and intrinsic reward computation. By employing contrastive learning, ETD accurately estimates temporal distances and derives intrinsic rewards based on the novelty of states within the current episode. Extensive experiments on various benchmark tasks demonstrate that ETD significantly outperforms state-of-the-art methods, highlighting its effectiveness in enhancing exploration in sparse reward CMDPs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15418v1-abstract-full').style.display = 'none'; document.getElementById('2501.15418v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15099">arXiv:2501.15099</a> <span> [<a href="https://arxiv.org/pdf/2501.15099">pdf</a>, <a href="https://arxiv.org/format/2501.15099">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bringing RGB and IR Together: Hierarchical Multi-Modal Enhancement for Robust Transmission Line Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shengdong Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaoqin Zhang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+W">Wenqi Ren</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+L">Linlin Shen</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+S">Shaohua Wan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y+M">Yujing M Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15099v1-abstract-short" style="display: inline;"> Ensuring a stable power supply in rural areas relies heavily on effective inspection of power equipment, particularly transmission lines (TLs). However, detecting TLs from aerial imagery can be challenging when dealing with misalignments between visible light (RGB) and infrared (IR) images, as well as mismatched high- and low-level features in convolutional networks. To address these limitations,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15099v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15099v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15099v1-abstract-full" style="display: none;"> Ensuring a stable power supply in rural areas relies heavily on effective inspection of power equipment, particularly transmission lines (TLs). However, detecting TLs from aerial imagery can be challenging when dealing with misalignments between visible light (RGB) and infrared (IR) images, as well as mismatched high- and low-level features in convolutional networks. To address these limitations, we propose a novel Hierarchical Multi-Modal Enhancement Network (HMMEN) that integrates RGB and IR data for robust and accurate TL detection. Our method introduces two key components: (1) a Mutual Multi-Modal Enhanced Block (MMEB), which fuses and enhances hierarchical RGB and IR feature maps in a coarse-to-fine manner, and (2) a Feature Alignment Block (FAB) that corrects misalignments between decoder outputs and IR feature maps by leveraging deformable convolutions. We employ MobileNet-based encoders for both RGB and IR inputs to accommodate edge-computing constraints and reduce computational overhead. Experimental results on diverse weather and lighting conditionsfog, night, snow, and daytimedemonstrate the superiority and robustness of our approach compared to state-of-the-art methods, resulting in fewer false positives, enhanced boundary delineation, and better overall detection performance. This framework thus shows promise for practical large-scale power line inspections with unmanned aerial vehicles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15099v1-abstract-full').style.display = 'none'; document.getElementById('2501.15099v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15058">arXiv:2501.15058</a> <span> [<a href="https://arxiv.org/pdf/2501.15058">pdf</a>, <a href="https://arxiv.org/format/2501.15058">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> KETA: Kinematic-Phrases-Enhanced Text-to-Motion Generation via Fine-grained Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yu Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yixing Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xingyang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15058v1-abstract-short" style="display: inline;"> Motion synthesis plays a vital role in various fields of artificial intelligence. Among the various conditions of motion generation, text can describe motion details elaborately and is easy to acquire, making text-to-motion(T2M) generation important. State-of-the-art T2M techniques mainly leverage diffusion models to generate motions with text prompts as guidance, tackling the many-to-many nature… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15058v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15058v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15058v1-abstract-full" style="display: none;"> Motion synthesis plays a vital role in various fields of artificial intelligence. Among the various conditions of motion generation, text can describe motion details elaborately and is easy to acquire, making text-to-motion(T2M) generation important. State-of-the-art T2M techniques mainly leverage diffusion models to generate motions with text prompts as guidance, tackling the many-to-many nature of T2M tasks. However, existing T2M approaches face challenges, given the gap between the natural language domain and the physical domain, making it difficult to generate motions fully consistent with the texts. We leverage kinematic phrases(KP), an intermediate representation that bridges these two modalities, to solve this. Our proposed method, KETA, decomposes the given text into several decomposed texts via a language model. It trains an aligner to align decomposed texts with the KP segments extracted from the generated motions. Thus, it's possible to restrict the behaviors for diffusion-based T2M models. During the training stage, we deploy the text-KP alignment loss as an auxiliary goal to supervise the models. During the inference stage, we refine our generated motions for multiple rounds in our decoder structure, where we compute the text-KP distance as the guidance signal in each new round. Experiments demonstrate that KETA achieves up to 1.19x, 2.34x better R precision and FID value on both backbones of the base model, motion diffusion model. Compared to a wide range of T2M generation models. KETA achieves either the best or the second-best performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15058v1-abstract-full').style.display = 'none'; document.getElementById('2501.15058v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14654">arXiv:2501.14654</a> <span> [<a href="https://arxiv.org/pdf/2501.14654">pdf</a>, <a href="https://arxiv.org/format/2501.14654">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> MedAgentBench: A Realistic Virtual EHR Environment to Benchmark Medical LLM Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yixing Jiang</a>, <a href="/search/cs?searchtype=author&query=Black%2C+K+C">Kameron C. Black</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+G">Gloria Geng</a>, <a href="/search/cs?searchtype=author&query=Park%2C+D">Danny Park</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">James Zou</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+A+Y">Andrew Y. Ng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J+H">Jonathan H. Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14654v2-abstract-short" style="display: inline;"> Recent large language models (LLMs) have demonstrated significant advancements, particularly in their ability to serve as agents thereby surpassing their traditional role as chatbots. These agents can leverage their planning and tool utilization capabilities to address tasks specified at a high level. However, a standardized dataset to benchmark the agent capabilities of LLMs in medical applicatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14654v2-abstract-full').style.display = 'inline'; document.getElementById('2501.14654v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14654v2-abstract-full" style="display: none;"> Recent large language models (LLMs) have demonstrated significant advancements, particularly in their ability to serve as agents thereby surpassing their traditional role as chatbots. These agents can leverage their planning and tool utilization capabilities to address tasks specified at a high level. However, a standardized dataset to benchmark the agent capabilities of LLMs in medical applications is currently lacking, making the evaluation of LLMs on complex tasks in interactive healthcare environments challenging. To address this gap, we introduce MedAgentBench, a broad evaluation suite designed to assess the agent capabilities of large language models within medical records contexts. MedAgentBench encompasses 300 patient-specific clinically-derived tasks from 10 categories written by human physicians, realistic profiles of 100 patients with over 700,000 data elements, a FHIR-compliant interactive environment, and an accompanying codebase. The environment uses the standard APIs and communication infrastructure used in modern EMR systems, so it can be easily migrated into live EMR systems. MedAgentBench presents an unsaturated agent-oriented benchmark that current state-of-the-art LLMs exhibit some ability to succeed at. The best model (Claude 3.5 Sonnet v2) achieves a success rate of 69.67%. However, there is still substantial space for improvement which gives the community a next direction to optimize. Furthermore, there is significant variation in performance across task categories. MedAgentBench establishes this and is publicly available at https://github.com/stanfordmlgroup/MedAgentBench , offering a valuable framework for model developers to track progress and drive continuous improvements in the agent capabilities of large language models within the medical domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14654v2-abstract-full').style.display = 'none'; document.getElementById('2501.14654v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14273">arXiv:2501.14273</a> <span> [<a href="https://arxiv.org/pdf/2501.14273">pdf</a>, <a href="https://arxiv.org/format/2501.14273">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Characteristic-Specific Partial Fine-Tuning for Efficient Emotion and Speaker Adaptation in Codec Language Text-to-Speech Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianrui Wang</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+M">Meng Ge</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+C">Cheng Gong</a>, <a href="/search/cs?searchtype=author&query=Qiang%2C+C">Chunyu Qiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zikang Huang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yu Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaobao Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xie Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Longbiao Wang</a>, <a href="/search/cs?searchtype=author&query=Dang%2C+J">Jianwu Dang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14273v1-abstract-short" style="display: inline;"> Recently, emotional speech generation and speaker cloning have garnered significant interest in text-to-speech (TTS). With the open-sourcing of codec language TTS models trained on massive datasets with large-scale parameters, adapting these general pre-trained TTS models to generate speech with specific emotional expressions and target speaker characteristics has become a topic of great attention… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14273v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14273v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14273v1-abstract-full" style="display: none;"> Recently, emotional speech generation and speaker cloning have garnered significant interest in text-to-speech (TTS). With the open-sourcing of codec language TTS models trained on massive datasets with large-scale parameters, adapting these general pre-trained TTS models to generate speech with specific emotional expressions and target speaker characteristics has become a topic of great attention. Common approaches, such as full and adapter-based fine-tuning, often overlook the specific contributions of model parameters to emotion and speaker control. Treating all parameters uniformly during fine-tuning, especially when the target data has limited content diversity compared to the pre-training corpus, results in slow training speed and an increased risk of catastrophic forgetting. To address these challenges, we propose a characteristic-specific partial fine-tuning strategy, short as CSP-FT. First, we use a weighted-sum approach to analyze the contributions of different Transformer layers in a pre-trained codec language TTS model for emotion and speaker control in the generated speech. We then selectively fine-tune the layers with the highest and lowest characteristic-specific contributions to generate speech with target emotional expression and speaker identity. Experimental results demonstrate that our method achieves performance comparable to, or even surpassing, full fine-tuning in generating speech with specific emotional expressions and speaker identities. Additionally, CSP-FT delivers approximately 2x faster training speeds, fine-tunes only around 8% of parameters, and significantly reduces catastrophic forgetting. Furthermore, we show that codec language TTS models perform competitively with self-supervised models in speaker identification and emotion classification tasks, offering valuable insights for developing universal speech processing models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14273v1-abstract-full').style.display = 'none'; document.getElementById('2501.14273v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14249">arXiv:2501.14249</a> <span> [<a href="https://arxiv.org/pdf/2501.14249">pdf</a>, <a href="https://arxiv.org/format/2501.14249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Humanity's Last Exam </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&query=Gatti%2C+A">Alice Gatti</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Ziwen Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nathaniel Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Josephina Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hugh Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C+B+C">Chen Bo Calvin Zhang</a>, <a href="/search/cs?searchtype=author&query=Shaaban%2C+M">Mohamed Shaaban</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+J">John Ling</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Sean Shi</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+M">Michael Choi</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+A">Anish Agrawal</a>, <a href="/search/cs?searchtype=author&query=Chopra%2C+A">Arnav Chopra</a>, <a href="/search/cs?searchtype=author&query=Khoja%2C+A">Adam Khoja</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+R">Ryan Kim</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+R">Richard Ren</a>, <a href="/search/cs?searchtype=author&query=Hausenloy%2C+J">Jason Hausenloy</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+O">Oliver Zhang</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tung Nguyen</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+D">Daron Anderson</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+I+A">Imad Ali Shah</a>, <a href="/search/cs?searchtype=author&query=Doroshenko%2C+M">Mikhail Doroshenko</a>, <a href="/search/cs?searchtype=author&query=Stokes%2C+A+C">Alun Cennyth Stokes</a>, <a href="/search/cs?searchtype=author&query=Mahmood%2C+M">Mobeen Mahmood</a> , et al. (710 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14249v4-abstract-short" style="display: inline;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity's Last Exam (HLE), a multi-modal benchmark at the frontier of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v4-abstract-full').style.display = 'inline'; document.getElementById('2501.14249v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14249v4-abstract-full" style="display: none;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity's Last Exam (HLE), a multi-modal benchmark at the frontier of human knowledge, designed to be the final closed-ended academic benchmark of its kind with broad subject coverage. HLE consists of 3,000 questions across dozens of subjects, including mathematics, humanities, and the natural sciences. HLE is developed globally by subject-matter experts and consists of multiple-choice and short-answer questions suitable for automated grading. Each question has a known solution that is unambiguous and easily verifiable, but cannot be quickly answered via internet retrieval. State-of-the-art LLMs demonstrate low accuracy and calibration on HLE, highlighting a significant gap between current LLM capabilities and the expert human frontier on closed-ended academic questions. To inform research and policymaking upon a clear understanding of model capabilities, we publicly release HLE at https://lastexam.ai. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v4-abstract-full').style.display = 'none'; document.getElementById('2501.14249v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 6 figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository