CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 765 results for author: <span class="mathjax">Liu, G</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Liu%2C+G">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Liu, G"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Liu%2C+G&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Liu, G"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Liu%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Liu%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+G&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+G&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15153">arXiv:2502.15153</a> <span> [<a href="https://arxiv.org/pdf/2502.15153">pdf</a>, <a href="https://arxiv.org/format/2502.15153">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Investigating the Adaptive Robustness with Knowledge Conflicts in LLM-based Multi-Agent Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ju%2C+T">Tianjie Ju</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bowen Wang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+H">Hao Fei</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M">Mong-Li Lee</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+W">Wynne Hsu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yun Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qianren Wang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+P">Pengzhou Cheng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zongru Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhuosheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gongshen Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15153v1-abstract-short" style="display: inline;"> Recent advances in Large Language Models (LLMs) have upgraded them from sophisticated text generators to autonomous agents capable of corporation and tool use in multi-agent systems (MASs). However, the robustness of these LLM-based MASs, especially under knowledge conflicts, remains unclear. In this paper, we design four comprehensive metrics to investigate the robustness of MASs when facing mild… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15153v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15153v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15153v1-abstract-full" style="display: none;"> Recent advances in Large Language Models (LLMs) have upgraded them from sophisticated text generators to autonomous agents capable of corporation and tool use in multi-agent systems (MASs). However, the robustness of these LLM-based MASs, especially under knowledge conflicts, remains unclear. In this paper, we design four comprehensive metrics to investigate the robustness of MASs when facing mild or task-critical knowledge conflicts. We first analyze mild knowledge conflicts introduced by heterogeneous agents and find that they do not harm system robustness but instead improve collaborative decision-making. Next, we investigate task-critical knowledge conflicts by synthesizing knowledge conflicts and embedding them into one of the agents. Our results show that these conflicts have surprisingly little to no impact on MAS robustness. Furthermore, we observe that MASs demonstrate certain self-repairing capabilities by reducing their reliance on knowledge conflicts and adopting alternative solution paths to maintain stability. Finally, we conduct ablation studies on the knowledge conflict number, agent number, and interaction rounds, finding that the self-repairing capability of MASs has intrinsic limits, and all findings hold consistently across various factors. Our code is publicly available at https://github.com/wbw625/MultiAgentRobustness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15153v1-abstract-full').style.display = 'none'; document.getElementById('2502.15153v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Working in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15041">arXiv:2502.15041</a> <span> [<a href="https://arxiv.org/pdf/2502.15041">pdf</a>, <a href="https://arxiv.org/format/2502.15041">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking Android Malware Detection: Rethinking the Role of Traditional and Deep Learning Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guojun Liu</a>, <a href="/search/cs?searchtype=author&query=Caragea%2C+D">Doina Caragea</a>, <a href="/search/cs?searchtype=author&query=Ou%2C+X">Xinming Ou</a>, <a href="/search/cs?searchtype=author&query=Roy%2C+S">Sankardas Roy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15041v1-abstract-short" style="display: inline;"> Android malware detection has been extensively studied using both traditional machine learning (ML) and deep learning (DL) approaches. While many state-of-the-art detection models, particularly those based on DL, claim superior performance, they often rely on limited comparisons, lacking comprehensive benchmarking against traditional ML models across diverse datasets. This raises concerns about th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15041v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15041v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15041v1-abstract-full" style="display: none;"> Android malware detection has been extensively studied using both traditional machine learning (ML) and deep learning (DL) approaches. While many state-of-the-art detection models, particularly those based on DL, claim superior performance, they often rely on limited comparisons, lacking comprehensive benchmarking against traditional ML models across diverse datasets. This raises concerns about the robustness of DL-based approaches' performance and the potential oversight of simpler, more efficient ML models. In this paper, we conduct a systematic evaluation of Android malware detection models across four datasets: three recently published, publicly available datasets and a large-scale dataset we systematically collected. We implement a range of traditional ML models, including Random Forests (RF) and CatBoost, alongside advanced DL models such as Capsule Graph Neural Networks (CapsGNN), BERT-based models, and ExcelFormer based models. Our results reveal that while advanced DL models can achieve strong performance, they are often compared against an insufficient number of traditional ML baselines. In many cases, simpler and more computationally efficient ML models achieve comparable or even superior performance. These findings highlight the need for rigorous benchmarking in Android malware detection research. We encourage future studies to conduct more comprehensive benchmarking comparisons between traditional and advanced models to ensure a more accurate assessment of detection capabilities. To facilitate further research, we provide access to our dataset, including app IDs, hash values, and labels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15041v1-abstract-full').style.display = 'none'; document.getElementById('2502.15041v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14340">arXiv:2502.14340</a> <span> [<a href="https://arxiv.org/pdf/2502.14340">pdf</a>, <a href="https://arxiv.org/format/2502.14340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Earlier Tokens Contribute More: Learning Direct Preference Optimization From Temporal Decay Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+R">Ruichen Shao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bei Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gangao Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yang Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiang Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingang Wang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xunliang Cai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Peng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14340v1-abstract-short" style="display: inline;"> Direct Preference Optimization (DPO) has gained attention as an efficient alternative to reinforcement learning from human feedback (RLHF) for aligning large language models (LLMs) with human preferences. Despite its advantages, DPO suffers from a length bias, generating responses longer than those from the reference model. Existing solutions like SimPO and SamPO address this issue but uniformly t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14340v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14340v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14340v1-abstract-full" style="display: none;"> Direct Preference Optimization (DPO) has gained attention as an efficient alternative to reinforcement learning from human feedback (RLHF) for aligning large language models (LLMs) with human preferences. Despite its advantages, DPO suffers from a length bias, generating responses longer than those from the reference model. Existing solutions like SimPO and SamPO address this issue but uniformly treat the contribution of rewards across sequences, overlooking temporal dynamics. To this end, we propose an enhanced preference optimization method that incorporates a temporal decay factor controlled by a gamma parameter. This dynamic weighting mechanism adjusts the influence of each reward based on its position in the sequence, prioritizing earlier tokens that are more critical for alignment. By adaptively focusing on more relevant feedback, our approach mitigates overfitting to less pertinent data and remains responsive to evolving human preferences. Experimental results on several benchmarks show that our approach consistently outperforms vanilla DPO by 5.9-8.8 points on AlpacaEval 2 and 3.3-9.7 points on Arena-Hard across different model architectures and sizes. Furthermore, additional experiments on mathematical and reasoning benchmarks (MMLU, GSM8K, and MATH) confirm that our method enhances performance without compromising general capabilities. Our codebase would be available at \url{https://github.com/LotuSrc/D2PO}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14340v1-abstract-full').style.display = 'none'; document.getElementById('2502.14340v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14075">arXiv:2502.14075</a> <span> [<a href="https://arxiv.org/pdf/2502.14075">pdf</a>, <a href="https://arxiv.org/format/2502.14075">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Vector Optimization on Low-Dimensional Vector Symbolic Architecture </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Duan%2C+S">Shijin Duan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yejia Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gaowen Liu</a>, <a href="/search/cs?searchtype=author&query=Kompella%2C+R+R">Ramana Rao Kompella</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+S">Shaolei Ren</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaolin Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14075v1-abstract-short" style="display: inline;"> Vector Symbolic Architecture (VSA) is emerging in machine learning due to its efficiency, but they are hindered by issues of hyperdimensionality and accuracy. As a promising mitigation, the Low-Dimensional Computing (LDC) method significantly reduces the vector dimension by ~100 times while maintaining accuracy, by employing a gradient-based optimization. Despite its potential, LDC optimization fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14075v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14075v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14075v1-abstract-full" style="display: none;"> Vector Symbolic Architecture (VSA) is emerging in machine learning due to its efficiency, but they are hindered by issues of hyperdimensionality and accuracy. As a promising mitigation, the Low-Dimensional Computing (LDC) method significantly reduces the vector dimension by ~100 times while maintaining accuracy, by employing a gradient-based optimization. Despite its potential, LDC optimization for VSA is still underexplored. Our investigation into vector updates underscores the importance of stable, adaptive dynamics in LDC training. We also reveal the overlooked yet critical roles of batch normalization (BN) and knowledge distillation (KD) in standard approaches. Besides the accuracy boost, BN does not add computational overhead during inference, and KD significantly enhances inference confidence. Through extensive experiments and ablation studies across multiple benchmarks, we provide a thorough evaluation of our approach and extend the interpretability of binary neural network optimization similar to LDC, previously unaddressed in BNN literature. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14075v1-abstract-full').style.display = 'none'; document.getElementById('2502.14075v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 2 figures. Accepted in CPAL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12659">arXiv:2502.12659</a> <span> [<a href="https://arxiv.org/pdf/2502.12659">pdf</a>, <a href="https://arxiv.org/format/2502.12659">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> The Hidden Risks of Large Reasoning Models: A Safety Assessment of R1 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Kaiwen Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengzhi Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xuandong Zhao</a>, <a href="/search/cs?searchtype=author&query=Jangam%2C+S">Shreedhar Jangam</a>, <a href="/search/cs?searchtype=author&query=Srinivasa%2C+J">Jayanth Srinivasa</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gaowen Liu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawn Song</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X+E">Xin Eric Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12659v1-abstract-short" style="display: inline;"> The rapid development of large reasoning models, such as OpenAI-o3 and DeepSeek-R1, has led to significant improvements in complex reasoning over non-reasoning large language models~(LLMs). However, their enhanced capabilities, combined with the open-source access of models like DeepSeek-R1, raise serious safety concerns, particularly regarding their potential for misuse. In this work, we present… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12659v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12659v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12659v1-abstract-full" style="display: none;"> The rapid development of large reasoning models, such as OpenAI-o3 and DeepSeek-R1, has led to significant improvements in complex reasoning over non-reasoning large language models~(LLMs). However, their enhanced capabilities, combined with the open-source access of models like DeepSeek-R1, raise serious safety concerns, particularly regarding their potential for misuse. In this work, we present a comprehensive safety assessment of these reasoning models, leveraging established safety benchmarks to evaluate their compliance with safety regulations. Furthermore, we investigate their susceptibility to adversarial attacks, such as jailbreaking and prompt injection, to assess their robustness in real-world applications. Through our multi-faceted analysis, we uncover four key findings: (1) There is a significant safety gap between the open-source R1 models and the o3-mini model, on both safety benchmark and attack, suggesting more safety effort on R1 is needed. (2) The distilled reasoning model shows poorer safety performance compared to its safety-aligned base models. (3) The stronger the model's reasoning ability, the greater the potential harm it may cause when answering unsafe questions. (4) The thinking process in R1 models pose greater safety concerns than their final answers. Our study provides insights into the security implications of reasoning models and highlights the need for further advancements in R1 models' safety to close the gap. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12659v1-abstract-full').style.display = 'none'; document.getElementById('2502.12659v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10807">arXiv:2502.10807</a> <span> [<a href="https://arxiv.org/pdf/2502.10807">pdf</a>, <a href="https://arxiv.org/format/2502.10807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> HybriDNA: A Hybrid Transformer-Mamba2 Long-Range DNA Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+M">Mingqian Ma</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guoqing Liu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+C">Chuan Cao</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+P">Pan Deng</a>, <a href="/search/cs?searchtype=author&query=Dao%2C+T">Tri Dao</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+A">Albert Gu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+P">Peiran Jin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhao Yang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yingce Xia</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+R">Renqian Luo</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+P">Pipi Hu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zun Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuan-Jyue Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haiguang Liu</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+T">Tao Qin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10807v2-abstract-short" style="display: inline;"> Advances in natural language processing and large language models have sparked growing interest in modeling DNA, often referred to as the "language of life". However, DNA modeling poses unique challenges. First, it requires the ability to process ultra-long DNA sequences while preserving single-nucleotide resolution, as individual nucleotides play a critical role in DNA function. Second, success i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10807v2-abstract-full').style.display = 'inline'; document.getElementById('2502.10807v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10807v2-abstract-full" style="display: none;"> Advances in natural language processing and large language models have sparked growing interest in modeling DNA, often referred to as the "language of life". However, DNA modeling poses unique challenges. First, it requires the ability to process ultra-long DNA sequences while preserving single-nucleotide resolution, as individual nucleotides play a critical role in DNA function. Second, success in this domain requires excelling at both generative and understanding tasks: generative tasks hold potential for therapeutic and industrial applications, while understanding tasks provide crucial insights into biological mechanisms and diseases. To address these challenges, we propose HybriDNA, a decoder-only DNA language model that incorporates a hybrid Transformer-Mamba2 architecture, seamlessly integrating the strengths of attention mechanisms with selective state-space models. This hybrid design enables HybriDNA to efficiently process DNA sequences up to 131kb in length with single-nucleotide resolution. HybriDNA achieves state-of-the-art performance across 33 DNA understanding datasets curated from the BEND, GUE, and LRB benchmarks, and demonstrates exceptional capability in generating synthetic cis-regulatory elements (CREs) with desired properties. Furthermore, we show that HybriDNA adheres to expected scaling laws, with performance improving consistently as the model scales from 300M to 3B and 7B parameters. These findings underscore HybriDNA's versatility and its potential to advance DNA research and applications, paving the way for innovations in understanding and engineering the "language of life". <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10807v2-abstract-full').style.display = 'none'; document.getElementById('2502.10807v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://hybridna-project.github.io/HybriDNA-Project/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10641">arXiv:2502.10641</a> <span> [<a href="https://arxiv.org/pdf/2502.10641">pdf</a>, <a href="https://arxiv.org/format/2502.10641">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Toward Equitable Access: Leveraging Crowdsourced Reviews to Investigate Public Perceptions of Health Resource Accessibility </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xue%2C+Z">Zhaoqian Xue</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guanhong Liu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+K">Kai Wei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chong Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Q">Qingcheng Zeng</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Songhua Hu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+W">Wenyue Hua</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+L">Lizhou Fan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lingyao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10641v1-abstract-short" style="display: inline;"> Access to health resources is a critical determinant of public well-being and societal resilience, particularly during public health crises when demand for medical services and preventive care surges. However, disparities in accessibility persist across demographic and geographic groups, raising concerns about equity. Traditional survey methods often fall short due to limitations in coverage, cost… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10641v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10641v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10641v1-abstract-full" style="display: none;"> Access to health resources is a critical determinant of public well-being and societal resilience, particularly during public health crises when demand for medical services and preventive care surges. However, disparities in accessibility persist across demographic and geographic groups, raising concerns about equity. Traditional survey methods often fall short due to limitations in coverage, cost, and timeliness. This study leverages crowdsourced data from Google Maps reviews, applying advanced natural language processing techniques, specifically ModernBERT, to extract insights on public perceptions of health resource accessibility in the United States during the COVID-19 pandemic. Additionally, we employ Partial Least Squares regression to examine the relationship between accessibility perceptions and key socioeconomic and demographic factors including political affiliation, racial composition, and educational attainment. Our findings reveal that public perceptions of health resource accessibility varied significantly across the U.S., with disparities peaking during the pandemic and slightly easing post-crisis. Political affiliation, racial demographics, and education levels emerged as key factors shaping these perceptions. These findings underscore the need for targeted interventions and policy measures to address inequities, fostering a more inclusive healthcare infrastructure that can better withstand future public health challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10641v1-abstract-full').style.display = 'none'; document.getElementById('2502.10641v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10456">arXiv:2502.10456</a> <span> [<a href="https://arxiv.org/pdf/2502.10456">pdf</a>, <a href="https://arxiv.org/format/2502.10456">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Deep Reinforcement Learning-Based User Scheduling for Collaborative Perception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yandi Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guowei Liu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+L">Le Liang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+H">Hao Ye</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+C">Chongtao Guo</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+S">Shi Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10456v1-abstract-short" style="display: inline;"> Stand-alone perception systems in autonomous driving suffer from limited sensing ranges and occlusions at extended distances, potentially resulting in catastrophic outcomes. To address this issue, collaborative perception is envisioned to improve perceptual accuracy by using vehicle-to-everything (V2X) communication to enable collaboration among connected and autonomous vehicles and roadside units… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10456v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10456v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10456v1-abstract-full" style="display: none;"> Stand-alone perception systems in autonomous driving suffer from limited sensing ranges and occlusions at extended distances, potentially resulting in catastrophic outcomes. To address this issue, collaborative perception is envisioned to improve perceptual accuracy by using vehicle-to-everything (V2X) communication to enable collaboration among connected and autonomous vehicles and roadside units. However, due to limited communication resources, it is impractical for all units to transmit sensing data such as point clouds or high-definition video. As a result, it is essential to optimize the scheduling of communication links to ensure efficient spectrum utilization for the exchange of perceptual data. In this work, we propose a deep reinforcement learning-based V2X user scheduling algorithm for collaborative perception. Given the challenges in acquiring perceptual labels, we reformulate the conventional label-dependent objective into a label-free goal, based on characteristics of 3D object detection. Incorporating both channel state information (CSI) and semantic information, we develop a double deep Q-Network (DDQN)-based user scheduling framework for collaborative perception, named SchedCP. Simulation results verify the effectiveness and robustness of SchedCP compared with traditional V2X scheduling methods. Finally, we present a case study to illustrate how our proposed algorithm adaptively modifies the scheduling decisions by taking both instantaneous CSI and perceptual semantics into account. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10456v1-abstract-full').style.display = 'none'; document.getElementById('2502.10456v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09672">arXiv:2502.09672</a> <span> [<a href="https://arxiv.org/pdf/2502.09672">pdf</a>, <a href="https://arxiv.org/format/2502.09672">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> IMM-MOT: A Novel 3D Multi-object Tracking Framework with Interacting Multiple Model Filter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaohong Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xulong Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gang Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zili Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+L">Lei Meng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuhan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09672v1-abstract-short" style="display: inline;"> 3D Multi-Object Tracking (MOT) provides the trajectories of surrounding objects, assisting robots or vehicles in smarter path planning and obstacle avoidance. Existing 3D MOT methods based on the Tracking-by-Detection framework typically use a single motion model to track an object throughout its entire tracking process. However, objects may change their motion patterns due to variations in the su… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09672v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09672v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09672v1-abstract-full" style="display: none;"> 3D Multi-Object Tracking (MOT) provides the trajectories of surrounding objects, assisting robots or vehicles in smarter path planning and obstacle avoidance. Existing 3D MOT methods based on the Tracking-by-Detection framework typically use a single motion model to track an object throughout its entire tracking process. However, objects may change their motion patterns due to variations in the surrounding environment. In this paper, we introduce the Interacting Multiple Model filter in IMM-MOT, which accurately fits the complex motion patterns of individual objects, overcoming the limitation of single-model tracking in existing approaches. In addition, we incorporate a Damping Window mechanism into the trajectory lifecycle management, leveraging the continuous association status of trajectories to control their creation and termination, reducing the occurrence of overlooked low-confidence true targets. Furthermore, we propose the Distance-Based Score Enhancement module, which enhances the differentiation between false positives and true positives by adjusting detection scores, thereby improving the effectiveness of the Score Filter. On the NuScenes Val dataset, IMM-MOT outperforms most other single-modal models using 3D point clouds, achieving an AMOTA of 73.8%. Our project is available at https://github.com/Ap01lo/IMM-MOT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09672v1-abstract-full').style.display = 'none'; document.getElementById('2502.09672v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages,5 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 65D19; 68T40 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09086">arXiv:2502.09086</a> <span> [<a href="https://arxiv.org/pdf/2502.09086">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Hybrid Model for Few-Shot Text Classification Using Transfer and Meta-Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jia Gao</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+S">Shuangquan Lyu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guiran Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+B">Binrong Zhu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Hongye Zheng</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+X">Xiaoxuan Liao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09086v1-abstract-short" style="display: inline;"> With the continuous development of natural language processing (NLP) technology, text classification tasks have been widely used in multiple application fields. However, obtaining labeled data is often expensive and difficult, especially in few-shot learning scenarios. To solve this problem, this paper proposes a few-shot text classification model based on transfer learning and meta-learning. The… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09086v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09086v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09086v1-abstract-full" style="display: none;"> With the continuous development of natural language processing (NLP) technology, text classification tasks have been widely used in multiple application fields. However, obtaining labeled data is often expensive and difficult, especially in few-shot learning scenarios. To solve this problem, this paper proposes a few-shot text classification model based on transfer learning and meta-learning. The model uses the knowledge of the pre-trained model for transfer and optimizes the model's rapid adaptability in few-sample tasks through a meta-learning mechanism. Through a series of comparative experiments and ablation experiments, we verified the effectiveness of the proposed method. The experimental results show that under the conditions of few samples and medium samples, the model based on transfer learning and meta-learning significantly outperforms traditional machine learning and deep learning methods. In addition, ablation experiments further analyzed the contribution of each component to the model performance and confirmed the key role of transfer learning and meta-learning in improving model accuracy. Finally, this paper discusses future research directions and looks forward to the potential of this method in practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09086v1-abstract-full').style.display = 'none'; document.getElementById('2502.09086v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09051">arXiv:2502.09051</a> <span> [<a href="https://arxiv.org/pdf/2502.09051">pdf</a>, <a href="https://arxiv.org/format/2502.09051">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> AIDE: Agentically Improve Visual Language Model with Domain Experts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chiu%2C+M">Ming-Chang Chiu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fuxiao Liu</a>, <a href="/search/cs?searchtype=author&query=Sapra%2C+K">Karan Sapra</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+A">Andrew Tao</a>, <a href="/search/cs?searchtype=author&query=Jacoob%2C+Y">Yaser Jacoob</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xuezhe Ma</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhiding Yu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guilin Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09051v1-abstract-short" style="display: inline;"> The enhancement of Visual Language Models (VLMs) has traditionally relied on knowledge distillation from larger, more capable models. This dependence creates a fundamental bottleneck for improving state-of-the-art systems, particularly when no superior models exist. We introduce AIDE (Agentic Improvement through Domain Experts), a novel framework that enables VLMs to autonomously enhance their cap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09051v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09051v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09051v1-abstract-full" style="display: none;"> The enhancement of Visual Language Models (VLMs) has traditionally relied on knowledge distillation from larger, more capable models. This dependence creates a fundamental bottleneck for improving state-of-the-art systems, particularly when no superior models exist. We introduce AIDE (Agentic Improvement through Domain Experts), a novel framework that enables VLMs to autonomously enhance their capabilities by leveraging specialized domain expert models. AIDE operates through a four-stage process: (1) identifying instances for refinement, (2) engaging domain experts for targeted analysis, (3) synthesizing expert outputs with existing data, and (4) integrating enhanced instances into the training pipeline. Experiments on multiple benchmarks, including MMMU, MME, MMBench, etc., demonstrate AIDE's ability to achieve notable performance gains without relying on larger VLMs nor human supervision. Our framework provides a scalable, resource-efficient approach to continuous VLM improvement, addressing critical limitations in current methodologies, particularly valuable when larger models are unavailable to access. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09051v1-abstract-full').style.display = 'none'; document.getElementById('2502.09051v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09038">arXiv:2502.09038</a> <span> [<a href="https://arxiv.org/pdf/2502.09038">pdf</a>, <a href="https://arxiv.org/format/2502.09038">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AoI-Sensitive Data Forwarding with Distributed Beamforming in UAV-Assisted IoT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lang%2C+Z">Zifan Lang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guixia Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiahui Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zemin Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Leung%2C+V+C+M">Victor C. M. Leung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09038v1-abstract-short" style="display: inline;"> This paper proposes a UAV-assisted forwarding system based on distributed beamforming to enhance age of information (AoI) in Internet of Things (IoT). Specifically, UAVs collect and relay data between sensor nodes (SNs) and the remote base station (BS). However, flight delays increase the AoI and degrade the network performance. To mitigate this, we adopt distributed beamforming to extend the comm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09038v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09038v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09038v1-abstract-full" style="display: none;"> This paper proposes a UAV-assisted forwarding system based on distributed beamforming to enhance age of information (AoI) in Internet of Things (IoT). Specifically, UAVs collect and relay data between sensor nodes (SNs) and the remote base station (BS). However, flight delays increase the AoI and degrade the network performance. To mitigate this, we adopt distributed beamforming to extend the communication range, reduce the flight frequency and ensure the continuous data relay and efficient energy utilization. Then, we formulate an optimization problem to minimize AoI and UAV energy consumption, by jointly optimizing the UAV trajectories and communication schedules. The problem is non-convex and with high dynamic, and thus we propose a deep reinforcement learning (DRL)-based algorithm to solve the problem, thereby enhancing the stability and accelerate convergence speed. Simulation results show that the proposed algorithm effectively addresses the problem and outperforms other benchmark algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09038v1-abstract-full').style.display = 'none'; document.getElementById('2502.09038v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures, ICC2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08808">arXiv:2502.08808</a> <span> [<a href="https://arxiv.org/pdf/2502.08808">pdf</a>, <a href="https://arxiv.org/format/2502.08808">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A First-order Generative Bilevel Optimization Framework for Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+Q">Quan Xiao</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+H">Hui Yuan</a>, <a href="/search/cs?searchtype=author&query=Saif%2C+A+F+M">A F M Saif</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gaowen Liu</a>, <a href="/search/cs?searchtype=author&query=Kompella%2C+R">Ramana Kompella</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mengdi Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tianyi Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08808v1-abstract-short" style="display: inline;"> Diffusion models, which iteratively denoise data samples to synthesize high-quality outputs, have achieved empirical success across domains. However, optimizing these models for downstream tasks often involves nested bilevel structures, such as tuning hyperparameters for fine-tuning tasks or noise schedules in training dynamics, where traditional bilevel methods fail due to the infinite-dimensiona… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08808v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08808v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08808v1-abstract-full" style="display: none;"> Diffusion models, which iteratively denoise data samples to synthesize high-quality outputs, have achieved empirical success across domains. However, optimizing these models for downstream tasks often involves nested bilevel structures, such as tuning hyperparameters for fine-tuning tasks or noise schedules in training dynamics, where traditional bilevel methods fail due to the infinite-dimensional probability space and prohibitive sampling costs. We formalize this challenge as a generative bilevel optimization problem and address two key scenarios: (1) fine-tuning pre-trained models via an inference-only lower-level solver paired with a sample-efficient gradient estimator for the upper level, and (2) training diffusion models from scratch with noise schedule optimization by reparameterizing the lower-level problem and designing a computationally tractable gradient estimator. Our first-order bilevel framework overcomes the incompatibility of conventional bilevel methods with diffusion processes, offering theoretical grounding and computational practicality. Experiments demonstrate that our method outperforms existing fine-tuning and hyperparameter search baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08808v1-abstract-full').style.display = 'none'; document.getElementById('2502.08808v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08621">arXiv:2502.08621</a> <span> [<a href="https://arxiv.org/pdf/2502.08621">pdf</a>, <a href="https://arxiv.org/format/2502.08621">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> SportsBuddy: Designing and Evaluating an AI-Powered Sports Video Storytelling Tool Through Real-World Deployment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+T">Tica Lin</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+R">Ruxun Xiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gardenia Liu</a>, <a href="/search/cs?searchtype=author&query=Tiwari%2C+D">Divyanshu Tiwari</a>, <a href="/search/cs?searchtype=author&query=Chiang%2C+M">Meng-Chia Chiang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+C">Chenjiayi Ye</a>, <a href="/search/cs?searchtype=author&query=Pfister%2C+H">Hanspeter Pfister</a>, <a href="/search/cs?searchtype=author&query=Zhu-Tian%2C+C">Chen Zhu-Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08621v2-abstract-short" style="display: inline;"> Video storytelling is essential for sports performance analysis and fan engagement, enabling sports professionals and fans to effectively communicate and interpret the spatial and temporal dynamics of gameplay. Traditional methods rely on manual annotation and verbal explanations, placing significant demands on creators for video editing skills and on viewers for cognitive focus. However, these ap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08621v2-abstract-full').style.display = 'inline'; document.getElementById('2502.08621v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08621v2-abstract-full" style="display: none;"> Video storytelling is essential for sports performance analysis and fan engagement, enabling sports professionals and fans to effectively communicate and interpret the spatial and temporal dynamics of gameplay. Traditional methods rely on manual annotation and verbal explanations, placing significant demands on creators for video editing skills and on viewers for cognitive focus. However, these approaches are time-consuming and often struggle to accommodate individual needs. SportsBuddy addresses this gap with an intuitive, interactive video authoring tool. It combines player tracking, embedded interaction design, and timeline visualizations to seamlessly integrate narratives and visual cues within game contexts. This empowers users to effortlessly create context-driven video stories. Since its launch, over 150 sports users, including coaches, athletes, content creators, parents and fans, have utilized SportsBuddy to produce compelling game highlights for diverse use cases. User feedback highlights its accessibility and ease of use, making video storytelling and insight communication more attainable for diverse audiences. Case studies with collegiate teams and sports creators further demonstrate SportsBuddy's impact on enhancing coaching communication, game analysis, and fan engagement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08621v2-abstract-full').style.display = 'none'; document.getElementById('2502.08621v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at PacificVIS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07527">arXiv:2502.07527</a> <span> [<a href="https://arxiv.org/pdf/2502.07527">pdf</a>, <a href="https://arxiv.org/format/2502.07527">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NatureLM: Deciphering the Language of Nature for Scientific Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yingce Xia</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+P">Peiran Jin</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+S">Shufang Xie</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Liang He</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+C">Chuan Cao</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+R">Renqian Luo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guoqing Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zequn Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuan-Jyue Chen</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zekun Guo</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yeqi Bai</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+P">Pan Deng</a>, <a href="/search/cs?searchtype=author&query=Min%2C+Y">Yaosen Min</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Ziheng Lu</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+H">Hongxia Hao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Han Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jielan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chang Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jia Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jianwei Zhu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+K">Kehan Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+K">Kaiyuan Gao</a>, <a href="/search/cs?searchtype=author&query=Pei%2C+Q">Qizhi Pei</a> , et al. (20 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07527v1-abstract-short" style="display: inline;"> Foundation models have revolutionized natural language processing and artificial intelligence, significantly enhancing how machines comprehend and generate human languages. Inspired by the success of these foundation models, researchers have developed foundation models for individual scientific domains, including small molecules, materials, proteins, DNA, and RNA. However, these models are typical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07527v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07527v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07527v1-abstract-full" style="display: none;"> Foundation models have revolutionized natural language processing and artificial intelligence, significantly enhancing how machines comprehend and generate human languages. Inspired by the success of these foundation models, researchers have developed foundation models for individual scientific domains, including small molecules, materials, proteins, DNA, and RNA. However, these models are typically trained in isolation, lacking the ability to integrate across different scientific domains. Recognizing that entities within these domains can all be represented as sequences, which together form the "language of nature", we introduce Nature Language Model (briefly, NatureLM), a sequence-based science foundation model designed for scientific discovery. Pre-trained with data from multiple scientific domains, NatureLM offers a unified, versatile model that enables various applications including: (i) generating and optimizing small molecules, proteins, RNA, and materials using text instructions; (ii) cross-domain generation/design, such as protein-to-molecule and protein-to-RNA generation; and (iii) achieving state-of-the-art performance in tasks like SMILES-to-IUPAC translation and retrosynthesis on USPTO-50k. NatureLM offers a promising generalist approach for various scientific tasks, including drug discovery (hit generation/optimization, ADMET optimization, synthesis), novel material design, and the development of therapeutic proteins or nucleotides. We have developed NatureLM models in different sizes (1 billion, 8 billion, and 46.7 billion parameters) and observed a clear improvement in performance as the model size increases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07527v1-abstract-full').style.display = 'none'; document.getElementById('2502.07527v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">81 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07161">arXiv:2502.07161</a> <span> [<a href="https://arxiv.org/pdf/2502.07161">pdf</a>, <a href="https://arxiv.org/format/2502.07161">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Mamba Architecture for Vision Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ibrahim%2C+F">Fady Ibrahim</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangjun Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guanghui Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07161v1-abstract-short" style="display: inline;"> Transformers have become foundational for visual tasks such as object detection, semantic segmentation, and video understanding, but their quadratic complexity in attention mechanisms presents scalability challenges. To address these limitations, the Mamba architecture utilizes state-space models (SSMs) for linear scalability, efficient processing, and improved contextual awareness. This paper inv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07161v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07161v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07161v1-abstract-full" style="display: none;"> Transformers have become foundational for visual tasks such as object detection, semantic segmentation, and video understanding, but their quadratic complexity in attention mechanisms presents scalability challenges. To address these limitations, the Mamba architecture utilizes state-space models (SSMs) for linear scalability, efficient processing, and improved contextual awareness. This paper investigates Mamba architecture for visual domain applications and its recent advancements, including Vision Mamba (ViM) and VideoMamba, which introduce bidirectional scanning, selective scanning mechanisms, and spatiotemporal processing to enhance image and video understanding. Architectural innovations like position embeddings, cross-scan modules, and hierarchical designs further optimize the Mamba framework for global and local feature extraction. These advancements position Mamba as a promising architecture in computer vision research and applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07161v1-abstract-full').style.display = 'none'; document.getElementById('2502.07161v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06061">arXiv:2502.06061</a> <span> [<a href="https://arxiv.org/pdf/2502.06061">pdf</a>, <a href="https://arxiv.org/format/2502.06061">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Online Reward-Weighted Fine-Tuning of Flow Matching with Wasserstein Regularization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jiajun Fan</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+S">Shuaike Shen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+C">Chaoran Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxin Chen</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+C">Chumeng Liang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Ge Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06061v1-abstract-short" style="display: inline;"> Recent advancements in reinforcement learning (RL) have achieved great success in fine-tuning diffusion-based generative models. However, fine-tuning continuous flow-based generative models to align with arbitrary user-defined reward functions remains challenging, particularly due to issues such as policy collapse from overoptimization and the prohibitively high computational cost of likelihoods i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06061v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06061v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06061v1-abstract-full" style="display: none;"> Recent advancements in reinforcement learning (RL) have achieved great success in fine-tuning diffusion-based generative models. However, fine-tuning continuous flow-based generative models to align with arbitrary user-defined reward functions remains challenging, particularly due to issues such as policy collapse from overoptimization and the prohibitively high computational cost of likelihoods in continuous-time flows. In this paper, we propose an easy-to-use and theoretically sound RL fine-tuning method, which we term Online Reward-Weighted Conditional Flow Matching with Wasserstein-2 Regularization (ORW-CFM-W2). Our method integrates RL into the flow matching framework to fine-tune generative models with arbitrary reward functions, without relying on gradients of rewards or filtered datasets. By introducing an online reward-weighting mechanism, our approach guides the model to prioritize high-reward regions in the data manifold. To prevent policy collapse and maintain diversity, we incorporate Wasserstein-2 (W2) distance regularization into our method and derive a tractable upper bound for it in flow matching, effectively balancing exploration and exploitation of policy optimization. We provide theoretical analyses to demonstrate the convergence properties and induced data distributions of our method, establishing connections with traditional RL algorithms featuring Kullback-Leibler (KL) regularization and offering a more comprehensive understanding of the underlying mechanisms and learning behavior of our approach. Extensive experiments on tasks including target image generation, image compression, and text-image alignment demonstrate the effectiveness of our method, where our method achieves optimal policy convergence while allowing controllable trade-offs between reward maximization and diversity preservation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06061v1-abstract-full').style.display = 'none'; document.getElementById('2502.06061v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">61 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05675">arXiv:2502.05675</a> <span> [<a href="https://arxiv.org/pdf/2502.05675">pdf</a>, <a href="https://arxiv.org/format/2502.05675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Investigating the Shortcomings of LLMs in Step-by-Step Legal Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mishra%2C+V">Venkatesh Mishra</a>, <a href="/search/cs?searchtype=author&query=Pathiraja%2C+B">Bimsara Pathiraja</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Chidananda%2C+S">Sat Chidananda</a>, <a href="/search/cs?searchtype=author&query=Srinivasa%2C+J">Jayanth Srinivasa</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gaowen Liu</a>, <a href="/search/cs?searchtype=author&query=Payani%2C+A">Ali Payani</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05675v1-abstract-short" style="display: inline;"> Reasoning abilities of LLMs have been a key focus in recent years. One challenging reasoning domain with interesting nuances is legal reasoning, which requires careful application of rules, and precedents while balancing deductive and analogical reasoning, and conflicts between rules. Although there have been a few works on using LLMs for legal reasoning, their focus has been on overall accuracy.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05675v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05675v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05675v1-abstract-full" style="display: none;"> Reasoning abilities of LLMs have been a key focus in recent years. One challenging reasoning domain with interesting nuances is legal reasoning, which requires careful application of rules, and precedents while balancing deductive and analogical reasoning, and conflicts between rules. Although there have been a few works on using LLMs for legal reasoning, their focus has been on overall accuracy. In this paper, we dig deeper to do a step-by-step analysis and figure out where they commit errors. We use the college-level Multiple Choice Question-Answering (MCQA) task from the \textit{Civil Procedure} dataset and propose a new error taxonomy derived from initial manual analysis of reasoning chains with respect to several LLMs, including two objective measures: soundness and correctness scores. We then develop an LLM-based automated evaluation framework to identify reasoning errors and evaluate the performance of LLMs. The computation of soundness and correctness on the dataset using the auto-evaluator framework reveals several interesting insights. Furthermore, we show that incorporating the error taxonomy as feedback in popular prompting techniques marginally increases LLM performance. Our work will also serve as an evaluation framework that can be used in detailed error analysis of reasoning chains for logic-intensive complex tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05675v1-abstract-full').style.display = 'none'; document.getElementById('2502.05675v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL 2025 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05605">arXiv:2502.05605</a> <span> [<a href="https://arxiv.org/pdf/2502.05605">pdf</a>, <a href="https://arxiv.org/format/2502.05605">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ARIES: Stimulating Self-Refinement of Large Language Models by Iterative Preference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yongcheng Zeng</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+X">Xinyu Cui</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xuanfa Jin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guoqing Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zexu Sun</a>, <a href="/search/cs?searchtype=author&query=He%2C+Q">Quan He</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+N">Ning Yang</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jianye Hao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haifeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05605v1-abstract-short" style="display: inline;"> A truly intelligent Large Language Model (LLM) should be capable of correcting errors in its responses through external interactions. However, even the most advanced models often face challenges in improving their outputs. In this paper, we explore how to cultivate LLMs with the self-refinement capability through iterative preference training, and how this ability can be leveraged to improve model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05605v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05605v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05605v1-abstract-full" style="display: none;"> A truly intelligent Large Language Model (LLM) should be capable of correcting errors in its responses through external interactions. However, even the most advanced models often face challenges in improving their outputs. In this paper, we explore how to cultivate LLMs with the self-refinement capability through iterative preference training, and how this ability can be leveraged to improve model performance during inference. To this end, we introduce a novel post-training and inference framework, called ARIES: Adaptive Refinement and Iterative Enhancement Structure. This method iteratively performs preference training and self-refinement-based data collection. During training, ARIES strengthen the model's direct question-answering capability while simultaneously unlocking its self-refinement potential. During inference, ARIES harnesses this self-refinement capability to generate a series of progressively refined responses, which are then filtered using either the Reward Model Scoring or a simple yet effective Rule-Based Selection mechanism, specifically tailored to our approach, to construct a dataset for the next round of preference training. Experimental results demonstrate the remarkable performance of ARIES. When applied to the Llama-3.1-8B model and under the self-refinement setting, ARIES surpasses powerful models such as GPT-4o, achieving 62.3% length-controlled (LC) and a 63.3% raw win rates on AlpacaEval 2, outperforming Iterative DPO by 27.8% and 35.5% respectively, as well as a 50.3% win rate on Arena-Hard, surpassing Iterative DPO by 26.6%. Furthermore, ARIES consistently enhances performance on mathematical reasoning tasks like GSM8K and MATH. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05605v1-abstract-full').style.display = 'none'; document.getElementById('2502.05605v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05107">arXiv:2502.05107</a> <span> [<a href="https://arxiv.org/pdf/2502.05107">pdf</a>, <a href="https://arxiv.org/format/2502.05107">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> 3DMolFormer: A Dual-channel Framework for Structure-based Drug Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guoqing Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Can Chen</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yang Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05107v1-abstract-short" style="display: inline;"> Structure-based drug discovery, encompassing the tasks of protein-ligand docking and pocket-aware 3D drug design, represents a core challenge in drug discovery. However, no existing work can deal with both tasks to effectively leverage the duality between them, and current methods for each task are hindered by challenges in modeling 3D information and the limitations of available data. To address… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05107v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05107v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05107v1-abstract-full" style="display: none;"> Structure-based drug discovery, encompassing the tasks of protein-ligand docking and pocket-aware 3D drug design, represents a core challenge in drug discovery. However, no existing work can deal with both tasks to effectively leverage the duality between them, and current methods for each task are hindered by challenges in modeling 3D information and the limitations of available data. To address these issues, we propose 3DMolFormer, a unified dual-channel transformer-based framework applicable to both docking and 3D drug design tasks, which exploits their duality by utilizing docking functionalities within the drug design process. Specifically, we represent 3D pocket-ligand complexes using parallel sequences of discrete tokens and continuous numbers, and we design a corresponding dual-channel transformer model to handle this format, thereby overcoming the challenges of 3D information modeling. Additionally, we alleviate data limitations through large-scale pre-training on a mixed dataset, followed by supervised and reinforcement learning fine-tuning techniques respectively tailored for the two tasks. Experimental results demonstrate that 3DMolFormer outperforms previous approaches in both protein-ligand docking and pocket-aware 3D drug design, highlighting its promising application in structure-based drug discovery. The code is available at: https://github.com/HXYfighter/3DMolFormer . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05107v1-abstract-full').style.display = 'none'; document.getElementById('2502.05107v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03885">arXiv:2502.03885</a> <span> [<a href="https://arxiv.org/pdf/2502.03885">pdf</a>, <a href="https://arxiv.org/format/2502.03885">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> InfinitePOD: Building Datacenter-Scale High-Bandwidth Domain for LLM with Optical Circuit Switching Transceivers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shou%2C+C">Chenchen Shou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guyue Liu</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+H">Hao Nie</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+H">Huaiyu Meng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yu Zhou</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yimin Jiang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+W">Wenqing Lv</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yelong Xu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yuanwei Lu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhang Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yanbo Yu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yichen Shen</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yibo Zhu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+D">Daxin Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03885v2-abstract-short" style="display: inline;"> Scaling Large Language Model (LLM) training relies on multi-dimensional parallelism, where High-Bandwidth Domains (HBDs) are critical for communication-intensive parallelism like Tensor Parallelism (TP) and Expert Parallelism (EP). However, existing HBD architectures face fundamental limitations in scalability, cost, and fault resiliency: switch-centric HBDs (e.g., NVL-72) incur prohibitive scalin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03885v2-abstract-full').style.display = 'inline'; document.getElementById('2502.03885v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03885v2-abstract-full" style="display: none;"> Scaling Large Language Model (LLM) training relies on multi-dimensional parallelism, where High-Bandwidth Domains (HBDs) are critical for communication-intensive parallelism like Tensor Parallelism (TP) and Expert Parallelism (EP). However, existing HBD architectures face fundamental limitations in scalability, cost, and fault resiliency: switch-centric HBDs (e.g., NVL-72) incur prohibitive scaling costs, while GPU-centric HBDs (e.g., TPUv3/Dojo) suffer from severe fault propagation. Switch-GPU hybrid HBDs such as TPUv4 takes a middle-ground approach by leveraging Optical Circuit Switches, but the fault explosion radius remains large at the cube level (e.g., 64 TPUs). We propose InfinitePOD, a novel transceiver-centric HBD architecture that unifies connectivity and dynamic switching at the transceiver level using Optical Circuit Switching (OCS). By embedding OCS within each transceiver, InfinitePOD achieves reconfigurable point-to-multipoint connectivity, allowing the topology to adapt into variable-size rings. This design provides: i) datacenter-wide scalability without cost explosion; ii) fault resilience by isolating failures to a single node, and iii) full bandwidth utilization for fault-free GPUs. Key innovations include a Silicon Photonic (SiPh) based low-cost OCS transceiver (OCSTrx), a reconfigurable k-hop ring topology co-designed with intra-/inter-node communication, and an HBD-DCN orchestration algorithm maximizing GPU utilization while minimizing cross-ToR datacenter network traffic. The evaluation demonstrates that InfinitePOD achieves 31% of the cost of NVL-72, near-zero GPU waste ratio (over one order of magnitude lower than NVL-72 and TPUv4), near-zero cross-ToR traffic when node fault ratios under 7%, and improves Model FLOPs Utilization by 3.37x compared to NVIDIA DGX (8 GPUs per Node). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03885v2-abstract-full').style.display = 'none'; document.getElementById('2502.03885v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02829">arXiv:2502.02829</a> <span> [<a href="https://arxiv.org/pdf/2502.02829">pdf</a>, <a href="https://arxiv.org/format/2502.02829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Global Contact-Rich Planning with Sparsity-Rich Semidefinite Relaxations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+S">Shucheng Kang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guorui Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Heng Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02829v2-abstract-short" style="display: inline;"> We show that contact-rich motion planning is also sparsity-rich when viewed as polynomial optimization (POP). We can exploit not only the correlative and term sparsity patterns that are general to all POPs, but also specialized sparsity patterns from the robot kinematic structure and the separability of contact modes. Such sparsity enables the design of high-order but sparse semidefinite programmi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02829v2-abstract-full').style.display = 'inline'; document.getElementById('2502.02829v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02829v2-abstract-full" style="display: none;"> We show that contact-rich motion planning is also sparsity-rich when viewed as polynomial optimization (POP). We can exploit not only the correlative and term sparsity patterns that are general to all POPs, but also specialized sparsity patterns from the robot kinematic structure and the separability of contact modes. Such sparsity enables the design of high-order but sparse semidefinite programming (SDPs) relaxations--building upon Lasserre's moment and sums of squares hierarchy--that (i) can be solved in seconds by off-the-shelf SDP solvers, and (ii) compute near globally optimal solutions to the nonconvex contact-rich planning problems with small certified suboptimality. Through extensive experiments both in simulation (Push Bot, Push Box, Push Box with Obstacles, and Planar Hand) and real world (Push T), we demonstrate the power of using convex SDP relaxations to generate global contact-rich motion plans. As a contribution of independent interest, we release the Sparse Polynomial Optimization Toolbox (SPOT)--implemented in C++ with interfaces to both Python and Matlab--that automates sparsity exploitation for robotics and beyond. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02829v2-abstract-full').style.display = 'none'; document.getElementById('2502.02829v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Website: https://computationalrobotics.seas.harvard.edu/project-spot/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16784">arXiv:2501.16784</a> <span> [<a href="https://arxiv.org/pdf/2501.16784">pdf</a>, <a href="https://arxiv.org/format/2501.16784">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> TORCHLIGHT: Shedding LIGHT on Real-World Attacks on Cloudless IoT Devices Concealed within the Tor Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pan%2C+Y">Yumingzhi Pan</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Z">Zhen Ling</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yue Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hongze Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangchi Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+J">Junzhou Luo</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+X">Xinwen Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16784v1-abstract-short" style="display: inline;"> The rapidly expanding Internet of Things (IoT) landscape is shifting toward cloudless architectures, removing reliance on centralized cloud services but exposing devices directly to the internet and increasing their vulnerability to cyberattacks. Our research revealed an unexpected pattern of substantial Tor network traffic targeting cloudless IoT devices. suggesting that attackers are using Tor t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16784v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16784v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16784v1-abstract-full" style="display: none;"> The rapidly expanding Internet of Things (IoT) landscape is shifting toward cloudless architectures, removing reliance on centralized cloud services but exposing devices directly to the internet and increasing their vulnerability to cyberattacks. Our research revealed an unexpected pattern of substantial Tor network traffic targeting cloudless IoT devices. suggesting that attackers are using Tor to anonymously exploit undisclosed vulnerabilities (possibly obtained from underground markets). To delve deeper into this phenomenon, we developed TORCHLIGHT, a tool designed to detect both known and unknown threats targeting cloudless IoT devices by analyzing Tor traffic. TORCHLIGHT filters traffic via specific IP patterns, strategically deploys virtual private server (VPS) nodes for cost-effective detection, and uses a chain-of-thought (CoT) process with large language models (LLMs) for accurate threat identification. Our results are significant: for the first time, we have demonstrated that attackers are indeed using Tor to conceal their identities while targeting cloudless IoT devices. Over a period of 12 months, TORCHLIGHT analyzed 26 TB of traffic, revealing 45 vulnerabilities, including 29 zero-day exploits with 25 CVE-IDs assigned (5 CRITICAL, 3 HIGH, 16 MEDIUM, and 1 LOW) and an estimated value of approximately $312,000. These vulnerabilities affect around 12.71 million devices across 148 countries, exposing them to severe risks such as information disclosure, authentication bypass, and arbitrary command execution. The findings have attracted significant attention, sparking widespread discussion in cybersecurity circles, reaching the top 25 on Hacker News, and generating over 190,000 views. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16784v1-abstract-full').style.display = 'none'; document.getElementById('2501.16784v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 14 figure, 9 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15415">arXiv:2501.15415</a> <span> [<a href="https://arxiv.org/pdf/2501.15415">pdf</a>, <a href="https://arxiv.org/format/2501.15415">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OCSU: Optical Chemical Structure Understanding for Molecule-centric Scientific Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+S">Siqi Fan</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yuguang Xie</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+B">Bowen Cai</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+A">Ailin Xie</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gaochao Liu</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+M">Mu Qiao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+J">Jie Xing</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+Z">Zaiqing Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15415v1-abstract-short" style="display: inline;"> Understanding the chemical structure from a graphical representation of a molecule is a challenging image caption task that would greatly benefit molecule-centric scientific discovery. Variations in molecular images and caption subtasks pose a significant challenge in both image representation learning and task modeling. Yet, existing methods only focus on a specific caption task that translates a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15415v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15415v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15415v1-abstract-full" style="display: none;"> Understanding the chemical structure from a graphical representation of a molecule is a challenging image caption task that would greatly benefit molecule-centric scientific discovery. Variations in molecular images and caption subtasks pose a significant challenge in both image representation learning and task modeling. Yet, existing methods only focus on a specific caption task that translates a molecular image into its graph structure, i.e., OCSR. In this paper, we propose the Optical Chemical Structure Understanding (OCSU) task, which extends OCSR to molecular image caption from motif level to molecule level and abstract level. We present two approaches for that, including an OCSR-based method and an end-to-end OCSR-free method. The proposed Double-Check achieves SOTA OCSR performance on real-world patent and journal article scenarios via attentive feature enhancement for local ambiguous atoms. Cascading with SMILES-based molecule understanding methods, it can leverage the power of existing task-specific models for OCSU. While Mol-VL is an end-to-end optimized VLM-based model. An OCSU dataset, Vis-CheBI20, is built based on the widely used CheBI20 dataset for training and evaluation. Extensive experimental results on Vis-CheBI20 demonstrate the effectiveness of the proposed approaches. Improving OCSR capability can lead to a better OCSU performance for OCSR-based approach, and the SOTA performance of Mol-VL demonstrates the great potential of end-to-end approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15415v1-abstract-full').style.display = 'none'; document.getElementById('2501.15415v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14818">arXiv:2501.14818</a> <span> [<a href="https://arxiv.org/pdf/2501.14818">pdf</a>, <a href="https://arxiv.org/format/2501.14818">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Eagle 2: Building Post-Training Data Strategies from Scratch for Frontier Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiqi Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Guo Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shilong Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shihao Wang</a>, <a href="/search/cs?searchtype=author&query=VS%2C+V">Vibashan VS</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+Y">Yishen Ji</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+S">Shiyi Lan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yilin Zhao</a>, <a href="/search/cs?searchtype=author&query=Radhakrishnan%2C+S">Subhashree Radhakrishnan</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+N">Nadine Chang</a>, <a href="/search/cs?searchtype=author&query=Sapra%2C+K">Karan Sapra</a>, <a href="/search/cs?searchtype=author&query=Deshmukh%2C+A+S">Amala Sanjay Deshmukh</a>, <a href="/search/cs?searchtype=author&query=Rintamaki%2C+T">Tuomas Rintamaki</a>, <a href="/search/cs?searchtype=author&query=Le%2C+M">Matthieu Le</a>, <a href="/search/cs?searchtype=author&query=Karmanov%2C+I">Ilia Karmanov</a>, <a href="/search/cs?searchtype=author&query=Voegtle%2C+L">Lukas Voegtle</a>, <a href="/search/cs?searchtype=author&query=Fischer%2C+P">Philipp Fischer</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+D">De-An Huang</a>, <a href="/search/cs?searchtype=author&query=Roman%2C+T">Timo Roman</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+T">Tong Lu</a>, <a href="/search/cs?searchtype=author&query=Alvarez%2C+J+M">Jose M. Alvarez</a>, <a href="/search/cs?searchtype=author&query=Catanzaro%2C+B">Bryan Catanzaro</a>, <a href="/search/cs?searchtype=author&query=Kautz%2C+J">Jan Kautz</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+A">Andrew Tao</a> , et al. (2 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14818v1-abstract-short" style="display: inline;"> Recently, promising progress has been made by open-source vision-language models (VLMs) in bringing their capabilities closer to those of proprietary frontier models. However, most open-source models only publish their final model weights, leaving the critical details of data strategies and implementation largely opaque. In this work, we address VLM post-training from a data-centric perspective, s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14818v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14818v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14818v1-abstract-full" style="display: none;"> Recently, promising progress has been made by open-source vision-language models (VLMs) in bringing their capabilities closer to those of proprietary frontier models. However, most open-source models only publish their final model weights, leaving the critical details of data strategies and implementation largely opaque. In this work, we address VLM post-training from a data-centric perspective, showing the key role of data strategy in developing frontier VLMs. By studying and building our post-training data strategy from scratch, we share detailed insights into the development processes, aiming to benefit the development of competitive models for the open-source community. Our introduced data strategy, together with training recipes and model design, leads to a family of performant VLMs named Eagle2. Specifically, Eagle2-9B achieves state-of-the-art results across various multimodal benchmarks, matching certain competitive models with up to 70B parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14818v1-abstract-full').style.display = 'none'; document.getElementById('2501.14818v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14208">arXiv:2501.14208</a> <span> [<a href="https://arxiv.org/pdf/2501.14208">pdf</a>, <a href="https://arxiv.org/format/2501.14208">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> You Only Teach Once: Learn One-Shot Bimanual Robotic Manipulation from Video Demonstrations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Huayi Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruixiang Wang</a>, <a href="/search/cs?searchtype=author&query=Tai%2C+Y">Yunxin Tai</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Y">Yueci Deng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guiliang Liu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+K">Kui Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14208v1-abstract-short" style="display: inline;"> Bimanual robotic manipulation is a long-standing challenge of embodied intelligence due to its characteristics of dual-arm spatial-temporal coordination and high-dimensional action spaces. Previous studies rely on pre-defined action taxonomies or direct teleoperation to alleviate or circumvent these issues, often making them lack simplicity, versatility and scalability. Differently, we believe tha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14208v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14208v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14208v1-abstract-full" style="display: none;"> Bimanual robotic manipulation is a long-standing challenge of embodied intelligence due to its characteristics of dual-arm spatial-temporal coordination and high-dimensional action spaces. Previous studies rely on pre-defined action taxonomies or direct teleoperation to alleviate or circumvent these issues, often making them lack simplicity, versatility and scalability. Differently, we believe that the most effective and efficient way for teaching bimanual manipulation is learning from human demonstrated videos, where rich features such as spatial-temporal positions, dynamic postures, interaction states and dexterous transitions are available almost for free. In this work, we propose the YOTO (You Only Teach Once), which can extract and then inject patterns of bimanual actions from as few as a single binocular observation of hand movements, and teach dual robot arms various complex tasks. Furthermore, based on keyframes-based motion trajectories, we devise a subtle solution for rapidly generating training demonstrations with diverse variations of manipulated objects and their locations. These data can then be used to learn a customized bimanual diffusion policy (BiDP) across diverse scenes. In experiments, YOTO achieves impressive performance in mimicking 5 intricate long-horizon bimanual tasks, possesses strong generalization under different visual and spatial conditions, and outperforms existing visuomotor imitation learning methods in accuracy and efficiency. Our project link is https://hnuzhy.github.io/projects/YOTO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14208v1-abstract-full').style.display = 'none'; document.getElementById('2501.14208v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13918">arXiv:2501.13918</a> <span> [<a href="https://arxiv.org/pdf/2501.13918">pdf</a>, <a href="https://arxiv.org/format/2501.13918">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Video Generation with Human Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jie Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gongye Liu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jiajun Liang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Z">Ziyang Yuan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaokun Liu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+M">Mingwu Zheng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiele Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiulin Wang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+W">Wenyu Qin</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+M">Menghan Xia</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xintao Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaohong Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fei Yang</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+P">Pengfei Wan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Di Zhang</a>, <a href="/search/cs?searchtype=author&query=Gai%2C+K">Kun Gai</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yujiu Yang</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+W">Wanli Ouyang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13918v1-abstract-short" style="display: inline;"> Video generation has achieved significant advances through rectified flow techniques, but issues like unsmooth motion and misalignment between videos and prompts persist. In this work, we develop a systematic pipeline that harnesses human feedback to mitigate these problems and refine the video generation model. Specifically, we begin by constructing a large-scale human preference dataset focused… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13918v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13918v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13918v1-abstract-full" style="display: none;"> Video generation has achieved significant advances through rectified flow techniques, but issues like unsmooth motion and misalignment between videos and prompts persist. In this work, we develop a systematic pipeline that harnesses human feedback to mitigate these problems and refine the video generation model. Specifically, we begin by constructing a large-scale human preference dataset focused on modern video generation models, incorporating pairwise annotations across multi-dimensions. We then introduce VideoReward, a multi-dimensional video reward model, and examine how annotations and various design choices impact its rewarding efficacy. From a unified reinforcement learning perspective aimed at maximizing reward with KL regularization, we introduce three alignment algorithms for flow-based models by extending those from diffusion models. These include two training-time strategies: direct preference optimization for flow (Flow-DPO) and reward weighted regression for flow (Flow-RWR), and an inference-time technique, Flow-NRG, which applies reward guidance directly to noisy videos. Experimental results indicate that VideoReward significantly outperforms existing reward models, and Flow-DPO demonstrates superior performance compared to both Flow-RWR and standard supervised fine-tuning methods. Additionally, Flow-NRG lets users assign custom weights to multiple objectives during inference, meeting personalized video quality needs. Project page: https://gongyeliu.github.io/videoalign. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13918v1-abstract-full').style.display = 'none'; document.getElementById('2501.13918v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13467">arXiv:2501.13467</a> <span> [<a href="https://arxiv.org/pdf/2501.13467">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Multi-Level Attention and Contrastive Learning for Enhanced Text Classification with an Optimized Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jia Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guiran Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+B">Binrong Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Shicheng Zhou</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Hongye Zheng</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+X">Xiaoxuan Liao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13467v1-abstract-short" style="display: inline;"> This paper studies a text classification algorithm based on an improved Transformer to improve the performance and efficiency of the model in text classification tasks. Aiming at the shortcomings of the traditional Transformer model in capturing deep semantic relationships and optimizing computational complexity, this paper introduces a multi-level attention mechanism and a contrastive learning st… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13467v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13467v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13467v1-abstract-full" style="display: none;"> This paper studies a text classification algorithm based on an improved Transformer to improve the performance and efficiency of the model in text classification tasks. Aiming at the shortcomings of the traditional Transformer model in capturing deep semantic relationships and optimizing computational complexity, this paper introduces a multi-level attention mechanism and a contrastive learning strategy. The multi-level attention mechanism effectively models the global semantics and local features in the text by combining global attention with local attention; the contrastive learning strategy enhances the model's ability to distinguish between different categories by constructing positive and negative sample pairs while improving the classification effect. In addition, in order to improve the training and inference efficiency of the model on large-scale text data, this paper designs a lightweight module to optimize the feature transformation process and reduce the computational cost. Experimental results on the dataset show that the improved Transformer model outperforms the comparative models such as BiLSTM, CNN, standard Transformer, and BERT in terms of classification accuracy, F1 score, and recall rate, showing stronger semantic representation ability and generalization performance. The method proposed in this paper provides a new idea for algorithm optimization in the field of text classification and has good application potential and practical value. Future work will focus on studying the performance of this model in multi-category imbalanced datasets and cross-domain tasks and explore the integration wi <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13467v1-abstract-full').style.display = 'none'; document.getElementById('2501.13467v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09950">arXiv:2501.09950</a> <span> [<a href="https://arxiv.org/pdf/2501.09950">pdf</a>, <a href="https://arxiv.org/format/2501.09950">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Sympathy over Polarization: A Computational Discourse Analysis of Social Media Posts about the July 2024 Trump Assassination Attempt </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Q">Qingcheng Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guanhong Liu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Z">Zhaoqian Xue</a>, <a href="/search/cs?searchtype=author&query=Ford%2C+D">Diego Ford</a>, <a href="/search/cs?searchtype=author&query=Voigt%2C+R">Rob Voigt</a>, <a href="/search/cs?searchtype=author&query=Hagen%2C+L">Loni Hagen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lingyao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09950v1-abstract-short" style="display: inline;"> On July 13, 2024, at the Trump rally in Pennsylvania, someone attempted to assassinate Republican Presidential Candidate Donald Trump. This attempt sparked a large-scale discussion on social media. We collected posts from X (formerly known as Twitter) one week before and after the assassination attempt and aimed to model the short-term effects of such a ``shock'' on public opinions and discussion… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09950v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09950v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09950v1-abstract-full" style="display: none;"> On July 13, 2024, at the Trump rally in Pennsylvania, someone attempted to assassinate Republican Presidential Candidate Donald Trump. This attempt sparked a large-scale discussion on social media. We collected posts from X (formerly known as Twitter) one week before and after the assassination attempt and aimed to model the short-term effects of such a ``shock'' on public opinions and discussion topics. Specifically, our study addresses three key questions: first, we investigate how public sentiment toward Donald Trump shifts over time and across regions (RQ1) and examine whether the assassination attempt itself significantly affects public attitudes, independent of the existing political alignments (RQ2). Finally, we explore the major themes in online conversations before and after the crisis, illustrating how discussion topics evolved in response to this politically charged event (RQ3). By integrating large language model-based sentiment analysis, difference-in-differences modeling, and topic modeling techniques, we find that following the attempt the public response was broadly sympathetic to Trump rather than polarizing, despite baseline ideological and regional disparities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09950v1-abstract-full').style.display = 'none'; document.getElementById('2501.09950v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09391">arXiv:2501.09391</a> <span> [<a href="https://arxiv.org/pdf/2501.09391">pdf</a>, <a href="https://arxiv.org/format/2501.09391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Contract-Inspired Contest Theory for Controllable Image Generation in Mobile Edge Metaverse </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09391v1-abstract-short" style="display: inline;"> The rapid advancement of immersive technologies has propelled the development of the Metaverse, where the convergence of virtual and physical realities necessitates the generation of high-quality, photorealistic images to enhance user experience. However, generating these images, especially through Generative Diffusion Models (GDMs), in mobile edge computing environments presents significant chall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09391v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09391v1-abstract-full" style="display: none;"> The rapid advancement of immersive technologies has propelled the development of the Metaverse, where the convergence of virtual and physical realities necessitates the generation of high-quality, photorealistic images to enhance user experience. However, generating these images, especially through Generative Diffusion Models (GDMs), in mobile edge computing environments presents significant challenges due to the limited computing resources of edge devices and the dynamic nature of wireless networks. This paper proposes a novel framework that integrates contract-inspired contest theory, Deep Reinforcement Learning (DRL), and GDMs to optimize image generation in these resource-constrained environments. The framework addresses the critical challenges of resource allocation and semantic data transmission quality by incentivizing edge devices to efficiently transmit high-quality semantic data, which is essential for creating realistic and immersive images. The use of contest and contract theory ensures that edge devices are motivated to allocate resources effectively, while DRL dynamically adjusts to network conditions, optimizing the overall image generation process. Experimental results demonstrate that the proposed approach not only improves the quality of generated images but also achieves superior convergence speed and stability compared to traditional methods. This makes the framework particularly effective for optimizing complex resource allocation tasks in mobile edge Metaverse applications, offering enhanced performance and efficiency in creating immersive virtual environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09391v1-abstract-full').style.display = 'none'; document.getElementById('2501.09391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 10figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09383">arXiv:2501.09383</a> <span> [<a href="https://arxiv.org/pdf/2501.09383">pdf</a>, <a href="https://arxiv.org/format/2501.09383">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Contextual Caching for Mobile Edge Large Language Model Service </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yinqiu Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09383v1-abstract-short" style="display: inline;"> Mobile edge Large Language Model (LLM) deployments face inherent constraints, such as limited computational resources and network bandwidth. Although Retrieval-Augmented Generation (RAG) mitigates some challenges by integrating external knowledge bases, inefficient cache management can still result in high retrieval latency and frequent cache updates. To address these issues, we propose an Adaptiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09383v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09383v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09383v1-abstract-full" style="display: none;"> Mobile edge Large Language Model (LLM) deployments face inherent constraints, such as limited computational resources and network bandwidth. Although Retrieval-Augmented Generation (RAG) mitigates some challenges by integrating external knowledge bases, inefficient cache management can still result in high retrieval latency and frequent cache updates. To address these issues, we propose an Adaptive Contextual Caching (ACC) framework that anticipates user needs by proactively caching semantically relevant data for mobile-edge LLMs. ACC utilizes a deep reinforcement learning (DRL) module to refine cache replacement policies, balancing user context, document similarity, and the overhead associated with cache misses. Experimental results demonstrate that ACC increases cache hit rates to over 80\% after only 11 training episodes, outperforming FIFO, LRU, and semantic-only caching while reducing retrieval latency by up to 40\%. In particular, ACC also reduces local caching overhead (i.e., the cost of updating the cache when a miss occurs) by as much as 55\%, enabling scalable, low-latency LLM services in resource-constrained edge environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09383v1-abstract-full').style.display = 'none'; document.getElementById('2501.09383v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08572">arXiv:2501.08572</a> <span> [<a href="https://arxiv.org/pdf/2501.08572">pdf</a>, <a href="https://arxiv.org/format/2501.08572">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> DNMDR: Dynamic Networks and Multi-view Drug Representations for Safe Medication Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guanlin Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiaomei Yu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zihao Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xue Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xingxu Fan</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xiangwei Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08572v1-abstract-short" style="display: inline;"> Medication Recommendation (MR) is a promising research topic which booms diverse applications in the healthcare and clinical domains. However, existing methods mainly rely on sequential modeling and static graphs for representation learning, which ignore the dynamic correlations in diverse medical events of a patient's temporal visits, leading to insufficient global structural exploration on nodes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08572v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08572v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08572v1-abstract-full" style="display: none;"> Medication Recommendation (MR) is a promising research topic which booms diverse applications in the healthcare and clinical domains. However, existing methods mainly rely on sequential modeling and static graphs for representation learning, which ignore the dynamic correlations in diverse medical events of a patient's temporal visits, leading to insufficient global structural exploration on nodes. Additionally, mitigating drug-drug interactions (DDIs) is another issue determining the utility of the MR systems. To address the challenges mentioned above, this paper proposes a novel MR method with the integration of dynamic networks and multi-view drug representations (DNMDR). Specifically, weighted snapshot sequences for dynamic heterogeneous networks are constructed based on discrete visits in temporal EHRs, and all the dynamic networks are jointly trained to gain both structural correlations in diverse medical events and temporal dependency in historical health conditions, for achieving comprehensive patient representations with both semantic features and structural relationships. Moreover, combining the drug co-occurrences and adverse drug-drug interactions (DDIs) in internal view of drug molecule structure and interactive view of drug pairs, the safe drug representations are available to obtain high-quality medication combination recommendation. Finally, extensive experiments on real world datasets are conducted for performance evaluation, and the experimental results demonstrate that the proposed DNMDR method outperforms the state-of-the-art baseline models with a large margin on various metrics such as PRAUC, Jaccard, DDI rates and so on. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08572v1-abstract-full').style.display = 'none'; document.getElementById('2501.08572v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05884">arXiv:2501.05884</a> <span> [<a href="https://arxiv.org/pdf/2501.05884">pdf</a>, <a href="https://arxiv.org/format/2501.05884">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Text-to-Edit: Controllable End-to-End Video Ad Creation via Multimodal LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+D">Dabing Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+H">Haosen Zhan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xingchen Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guisheng Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zemin Li</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jinghui Xie</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zhao Song</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+W">Weiguo Feng</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Bingyue Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05884v1-abstract-short" style="display: inline;"> The exponential growth of short-video content has ignited a surge in the necessity for efficient, automated solutions to video editing, with challenges arising from the need to understand videos and tailor the editing according to user requirements. Addressing this need, we propose an innovative end-to-end foundational framework, ultimately actualizing precise control over the final video content… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05884v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05884v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05884v1-abstract-full" style="display: none;"> The exponential growth of short-video content has ignited a surge in the necessity for efficient, automated solutions to video editing, with challenges arising from the need to understand videos and tailor the editing according to user requirements. Addressing this need, we propose an innovative end-to-end foundational framework, ultimately actualizing precise control over the final video content editing. Leveraging the flexibility and generalizability of Multimodal Large Language Models (MLLMs), we defined clear input-output mappings for efficient video creation. To bolster the model's capability in processing and comprehending video content, we introduce a strategic combination of a denser frame rate and a slow-fast processing technique, significantly enhancing the extraction and understanding of both temporal and spatial video information. Furthermore, we introduce a text-to-edit mechanism that allows users to achieve desired video outcomes through textual input, thereby enhancing the quality and controllability of the edited videos. Through comprehensive experimentation, our method has not only showcased significant effectiveness within advertising datasets, but also yields universally applicable conclusions on public datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05884v1-abstract-full').style.display = 'none'; document.getElementById('2501.05884v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16pages conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05411">arXiv:2501.05411</a> <span> [<a href="https://arxiv.org/pdf/2501.05411">pdf</a>, <a href="https://arxiv.org/format/2501.05411">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Path-Planning for Autonomous Robots: A UCH-Enhanced Q-Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wei Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruiyang Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haonan Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangwei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05411v1-abstract-short" style="display: inline;"> Q-learning methods are widely used in robot path planning but often face challenges of inefficient search and slow convergence. We propose an Improved Q-learning (IQL) framework that enhances standard Q-learning in two significant ways. First, we introduce the Path Adaptive Collaborative Optimization (PACO) algorithm to optimize Q-table initialization, providing better initial estimates and accele… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05411v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05411v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05411v1-abstract-full" style="display: none;"> Q-learning methods are widely used in robot path planning but often face challenges of inefficient search and slow convergence. We propose an Improved Q-learning (IQL) framework that enhances standard Q-learning in two significant ways. First, we introduce the Path Adaptive Collaborative Optimization (PACO) algorithm to optimize Q-table initialization, providing better initial estimates and accelerating learning. Second, we incorporate a Utility-Controlled Heuristic (UCH) mechanism with dynamically tuned parameters to optimize the reward function, enhancing the algorithm's accuracy and effectiveness in path-planning tasks. Extensive experiments in three different raster grid environments validate the superior performance of our IQL framework. The results demonstrate that our IQL algorithm outperforms existing methods, including FIQL, PP-QL-based CPP, DFQL, and QMABC algorithms, in terms of path-planning capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05411v1-abstract-full').style.display = 'none'; document.getElementById('2501.05411v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 20 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE, 2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04314">arXiv:2501.04314</a> <span> [<a href="https://arxiv.org/pdf/2501.04314">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Molecular HDD Logic for Encrypted Massive Data Storage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+B">Bingjie Guo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xinhui Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+A">An Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinxin Wang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+W">Wuhong Xue</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhixin Wu</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+X">Xiaolong Zhong</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+J">Jianmin Zeng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jinjin Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mao Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaohong Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yu Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04314v1-abstract-short" style="display: inline;"> Organic memories, with small dimension, fast speed and long retention features, are considered as promising candidates for massive data archiving. In order to satisfy the re-quirements for ultra-low power and high-security information storage, we design a concep-tual molecular hard-disk (HDD) logic scheme that is capable to execute in-situ encryption of massive data in pW/bit power-consumption ran… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04314v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04314v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04314v1-abstract-full" style="display: none;"> Organic memories, with small dimension, fast speed and long retention features, are considered as promising candidates for massive data archiving. In order to satisfy the re-quirements for ultra-low power and high-security information storage, we design a concep-tual molecular hard-disk (HDD) logic scheme that is capable to execute in-situ encryption of massive data in pW/bit power-consumption range. Beneficial from the coupled mechanism of counter-balanced redox reaction and local ion drifting, the basic HDD unit consisting of ~ 200 self-assembled RuXLPH molecules in a monolayer (SAM) configuration undergoes unique conductance modulation with continuous, symmetric and low-power switching char-acteristics. 96-state memory performance, which allows 6-bit data storage and single-unit one-step XOR operation, is realized in the RuXLPH SAM sample. Through single-unit XOR manipulation of the pixel information, in-situ bitwise encryption of the Mogao Grottoes mural images stored in the molecular HDD is demonstrated. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04314v1-abstract-full').style.display = 'none'; document.getElementById('2501.04314v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04285">arXiv:2501.04285</a> <span> [<a href="https://arxiv.org/pdf/2501.04285">pdf</a>, <a href="https://arxiv.org/format/2501.04285">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Separate Source Channel Coding Is Still What You Need: An LLM-based Rethinking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+T">Tianqi Ren</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongpeng Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Ming-min Zhao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xianfu Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyi Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhifeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Honggang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04285v1-abstract-short" style="display: inline;"> Along with the proliferating research interest in Semantic Communication (SemCom), Joint Source Channel Coding (JSCC) has dominated the attention due to the widely assumed existence in efficiently delivering information semantics. %has emerged as a pivotal area of research, aiming to enhance the efficiency and reliability of information transmission through deep learning-based methods. Nevertheles… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04285v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04285v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04285v1-abstract-full" style="display: none;"> Along with the proliferating research interest in Semantic Communication (SemCom), Joint Source Channel Coding (JSCC) has dominated the attention due to the widely assumed existence in efficiently delivering information semantics. %has emerged as a pivotal area of research, aiming to enhance the efficiency and reliability of information transmission through deep learning-based methods. Nevertheless, this paper challenges the conventional JSCC paradigm, and advocates for adoption of Separate Source Channel Coding (SSCC) to enjoy the underlying more degree of freedom for optimization. We demonstrate that SSCC, after leveraging the strengths of Large Language Model (LLM) for source coding and Error Correction Code Transformer (ECCT) complemented for channel decoding, offers superior performance over JSCC. Our proposed framework also effectively highlights the compatibility challenges between SemCom approaches and digital communication systems, particularly concerning the resource costs associated with the transmission of high precision floating point numbers. Through comprehensive evaluations, we establish that empowered by LLM-based compression and ECCT-enhanced error correction, SSCC remains a viable and effective solution for modern communication systems. In other words, separate source and channel coding is still what we need! <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04285v1-abstract-full').style.display = 'none'; document.getElementById('2501.04285v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03905">arXiv:2501.03905</a> <span> [<a href="https://arxiv.org/pdf/2501.03905">pdf</a>, <a href="https://arxiv.org/format/2501.03905">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> mFabric: An Efficient and Scalable Fabric for Mixture-of-Experts Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liao%2C+X">Xudong Liao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yijun Sun</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+H">Han Tian</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+X">Xinchen Wan</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yilun Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zilong Wang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Z">Zhenghang Ren</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xinyang Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenxue Li</a>, <a href="/search/cs?searchtype=author&query=Tse%2C+K+F">Kin Fai Tse</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+Z">Zhizhen Zhong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guyue Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Ying Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaofeng Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yiming Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03905v1-abstract-short" style="display: inline;"> Mixture-of-Expert (MoE) models outperform conventional models by selectively activating different subnets, named \emph{experts}, on a per-token basis. This gated computation generates dynamic communications that cannot be determined beforehand, challenging the existing GPU interconnects that remain \emph{static} during the distributed training process. In this paper, we advocate for a first-of-its… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03905v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03905v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03905v1-abstract-full" style="display: none;"> Mixture-of-Expert (MoE) models outperform conventional models by selectively activating different subnets, named \emph{experts}, on a per-token basis. This gated computation generates dynamic communications that cannot be determined beforehand, challenging the existing GPU interconnects that remain \emph{static} during the distributed training process. In this paper, we advocate for a first-of-its-kind system, called mFabric, that unlocks topology reconfiguration \emph{during} distributed MoE training. Towards this vision, we first perform a production measurement study and show that the MoE dynamic communication pattern has \emph{strong locality}, alleviating the requirement of global reconfiguration. Based on this, we design and implement a \emph{regionally reconfigurable high-bandwidth domain} on top of existing electrical interconnects using optical circuit switching (OCS), achieving scalability while maintaining rapid adaptability. We have built a fully functional mFabric prototype with commodity hardware and a customized collective communication runtime that trains state-of-the-art MoE models with \emph{in-training} topology reconfiguration across 32 A100 GPUs. Large-scale packet-level simulations show that mFabric delivers comparable performance as the non-blocking fat-tree fabric while boosting the training cost efficiency (e.g., performance per dollar) of four representative MoE models by 1.2$\times$--1.5$\times$ and 1.9$\times$--2.3$\times$ at 100 Gbps and 400 Gbps link bandwidths, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03905v1-abstract-full').style.display = 'none'; document.getElementById('2501.03905v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Corresponding authors: zhizhenz@mit.edu (Z. Zhong), kaichen@cse.ust.hk (K. Chen)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02195">arXiv:2501.02195</a> <span> [<a href="https://arxiv.org/pdf/2501.02195">pdf</a>, <a href="https://arxiv.org/format/2501.02195">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> An Optimal Algorithm for Half-plane Hitting Set </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gang Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haitao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02195v1-abstract-short" style="display: inline;"> Given a set $ P $ of $n$ points and a set $ H $ of $n$ half-planes in the plane, we consider the problem of computing a smallest subset of points such that each half-plane contains at least one point of the subset. The previously best algorithm solves the problem in $O(n^3 \log n)$ time. It is also known that $惟(n \log n)$ is a lower bound for the problem under the algebraic decision tree model. I… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02195v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02195v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02195v1-abstract-full" style="display: none;"> Given a set $ P $ of $n$ points and a set $ H $ of $n$ half-planes in the plane, we consider the problem of computing a smallest subset of points such that each half-plane contains at least one point of the subset. The previously best algorithm solves the problem in $O(n^3 \log n)$ time. It is also known that $惟(n \log n)$ is a lower bound for the problem under the algebraic decision tree model. In this paper, we present an $O(n \log n)$ time algorithm, which matches the lower bound and thus is optimal. Another virtue of the algorithm is that it is relatively simple. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02195v1-abstract-full').style.display = 'none'; document.getElementById('2501.02195v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in SOSA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01149">arXiv:2501.01149</a> <span> [<a href="https://arxiv.org/pdf/2501.01149">pdf</a>, <a href="https://arxiv.org/format/2501.01149">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A3: Android Agent Arena for Mobile GUI Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chai%2C+Y">Yuxiang Chai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hanhao Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiayu Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Liang Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guozhi Wang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+S">Shuai Ren</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Siyuan Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongsheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01149v2-abstract-short" style="display: inline;"> AI agents have become increasingly prevalent in recent years, driven by significant advancements in the field of large language models (LLMs). Mobile GUI agents, a subset of AI agents, are designed to autonomously perform tasks on mobile devices. While numerous studies have introduced agents, datasets, and benchmarks to advance mobile GUI agent research, many existing datasets focus on static fram… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01149v2-abstract-full').style.display = 'inline'; document.getElementById('2501.01149v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01149v2-abstract-full" style="display: none;"> AI agents have become increasingly prevalent in recent years, driven by significant advancements in the field of large language models (LLMs). Mobile GUI agents, a subset of AI agents, are designed to autonomously perform tasks on mobile devices. While numerous studies have introduced agents, datasets, and benchmarks to advance mobile GUI agent research, many existing datasets focus on static frame evaluations and fail to provide a comprehensive platform for assessing performance on real-world, in-the-wild tasks. To address this gap, we present Android Agent Arena (A3), a novel evaluation platform. Unlike existing in-the-wild systems, A3 offers: (1) meaningful and practical tasks, such as real-time online information retrieval and operational instructions; (2) a larger, more flexible action space, enabling compatibility with agents trained on any dataset; and (3) automated business-level LLM-based evaluation process. A3 includes 21 widely used general third-party apps and 201 tasks representative of common user scenarios, providing a robust foundation for evaluating mobile GUI agents in real-world situations and a new autonomous evaluation process for less human labor and coding expertise. The project is available at https://yuxiangchai.github.io/Android-Agent-Arena/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01149v2-abstract-full').style.display = 'none'; document.getElementById('2501.01149v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01124">arXiv:2501.01124</a> <span> [<a href="https://arxiv.org/pdf/2501.01124">pdf</a>, <a href="https://arxiv.org/format/2501.01124">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Graph2text or Graph2token: A Perspective of Large Language Models for Graph Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shuo Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yingbo Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruolin Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guchun Liu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yanming Shen</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+S">Shaoxiong Ji</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bowen Li</a>, <a href="/search/cs?searchtype=author&query=Han%2C+F">Fengling Han</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiuzhen Zhang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+F">Feng Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01124v1-abstract-short" style="display: inline;"> Graphs are data structures used to represent irregular networks and are prevalent in numerous real-world applications. Previous methods directly model graph structures and achieve significant success. However, these methods encounter bottlenecks due to the inherent irregularity of graphs. An innovative solution is converting graphs into textual representations, thereby harnessing the powerful capa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01124v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01124v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01124v1-abstract-full" style="display: none;"> Graphs are data structures used to represent irregular networks and are prevalent in numerous real-world applications. Previous methods directly model graph structures and achieve significant success. However, these methods encounter bottlenecks due to the inherent irregularity of graphs. An innovative solution is converting graphs into textual representations, thereby harnessing the powerful capabilities of Large Language Models (LLMs) to process and comprehend graphs. In this paper, we present a comprehensive review of methodologies for applying LLMs to graphs, termed LLM4graph. The core of LLM4graph lies in transforming graphs into texts for LLMs to understand and analyze. Thus, we propose a novel taxonomy of LLM4graph methods in the view of the transformation. Specifically, existing methods can be divided into two paradigms: Graph2text and Graph2token, which transform graphs into texts or tokens as the input of LLMs, respectively. We point out four challenges during the transformation to systematically present existing methods in a problem-oriented perspective. For practical concerns, we provide a guideline for researchers on selecting appropriate models and LLMs for different graphs and hardware constraints. We also identify five future research directions for LLM4graph. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01124v1-abstract-full').style.display = 'none'; document.getElementById('2501.01124v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01037">arXiv:2501.01037</a> <span> [<a href="https://arxiv.org/pdf/2501.01037">pdf</a>, <a href="https://arxiv.org/format/2501.01037">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MSC-Bench: Benchmarking and Analyzing Multi-Sensor Corruption for Driving Perception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hao%2C+X">Xiaoshuai Hao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guanqun Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yuting Zhao</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+Y">Yuheng Ji</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+M">Mengchuan Wei</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Haimei Zhao</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+L">Lingdong Kong</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+R">Rong Yin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01037v1-abstract-short" style="display: inline;"> Multi-sensor fusion models play a crucial role in autonomous driving perception, particularly in tasks like 3D object detection and HD map construction. These models provide essential and comprehensive static environmental information for autonomous driving systems. While camera-LiDAR fusion methods have shown promising results by integrating data from both modalities, they often depend on complet… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01037v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01037v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01037v1-abstract-full" style="display: none;"> Multi-sensor fusion models play a crucial role in autonomous driving perception, particularly in tasks like 3D object detection and HD map construction. These models provide essential and comprehensive static environmental information for autonomous driving systems. While camera-LiDAR fusion methods have shown promising results by integrating data from both modalities, they often depend on complete sensor inputs. This reliance can lead to low robustness and potential failures when sensors are corrupted or missing, raising significant safety concerns. To tackle this challenge, we introduce the Multi-Sensor Corruption Benchmark (MSC-Bench), the first comprehensive benchmark aimed at evaluating the robustness of multi-sensor autonomous driving perception models against various sensor corruptions. Our benchmark includes 16 combinations of corruption types that disrupt both camera and LiDAR inputs, either individually or concurrently. Extensive evaluations of six 3D object detection models and four HD map construction models reveal substantial performance degradation under adverse weather conditions and sensor failures, underscoring critical safety issues. The benchmark toolkit and affiliated code and model checkpoints have been made publicly accessible. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01037v1-abstract-full').style.display = 'none'; document.getElementById('2501.01037v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00907">arXiv:2501.00907</a> <span> [<a href="https://arxiv.org/pdf/2501.00907">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> U-GIFT: Uncertainty-Guided Firewall for Toxic Speech in Few-Shot Scenario </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+J">Jiaxin Song</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinyu Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yihao Wang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yifan Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ru Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianyi Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gongshen Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00907v1-abstract-short" style="display: inline;"> With the widespread use of social media, user-generated content has surged on online platforms. When such content includes hateful, abusive, offensive, or cyberbullying behavior, it is classified as toxic speech, posing a significant threat to the online ecosystem's integrity and safety. While manual content moderation is still prevalent, the overwhelming volume of content and the psychological st… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00907v1-abstract-full').style.display = 'inline'; document.getElementById('2501.00907v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00907v1-abstract-full" style="display: none;"> With the widespread use of social media, user-generated content has surged on online platforms. When such content includes hateful, abusive, offensive, or cyberbullying behavior, it is classified as toxic speech, posing a significant threat to the online ecosystem's integrity and safety. While manual content moderation is still prevalent, the overwhelming volume of content and the psychological strain on human moderators underscore the need for automated toxic speech detection. Previously proposed detection methods often rely on large annotated datasets; however, acquiring such datasets is both costly and challenging in practice. To address this issue, we propose an uncertainty-guided firewall for toxic speech in few-shot scenarios, U-GIFT, that utilizes self-training to enhance detection performance even when labeled data is limited. Specifically, U-GIFT combines active learning with Bayesian Neural Networks (BNNs) to automatically identify high-quality samples from unlabeled data, prioritizing the selection of pseudo-labels with higher confidence for training based on uncertainty estimates derived from model predictions. Extensive experiments demonstrate that U-GIFT significantly outperforms competitive baselines in few-shot detection scenarios. In the 5-shot setting, it achieves a 14.92\% performance improvement over the basic model. Importantly, U-GIFT is user-friendly and adaptable to various pre-trained language models (PLMs). It also exhibits robust performance in scenarios with sample imbalance and cross-domain settings, while showcasing strong generalization across various language applications. We believe that U-GIFT provides an efficient solution for few-shot toxic speech detection, offering substantial support for automated content moderation in cyberspace, thereby acting as a firewall to promote advancements in cybersecurity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00907v1-abstract-full').style.display = 'none'; document.getElementById('2501.00907v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 6 figures and 10 tables. Comments are welcome</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20279">arXiv:2412.20279</a> <span> [<a href="https://arxiv.org/pdf/2412.20279">pdf</a>, <a href="https://arxiv.org/format/2412.20279">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Deep Generalized Schr枚dinger Bridges: From Image Generation to Solving Mean-Field Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guan-Horng Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tianrong Chen</a>, <a href="/search/cs?searchtype=author&query=Theodorou%2C+E+A">Evangelos A. Theodorou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20279v1-abstract-short" style="display: inline;"> Generalized Schr枚dinger Bridges (GSBs) are a fundamental mathematical framework used to analyze the most likely particle evolution based on the principle of least action including kinetic and potential energy. In parallel to their well-established presence in the theoretical realms of quantum mechanics and optimal transport, this paper focuses on an algorithmic perspective, aiming to enhance pract… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20279v1-abstract-full').style.display = 'inline'; document.getElementById('2412.20279v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20279v1-abstract-full" style="display: none;"> Generalized Schr枚dinger Bridges (GSBs) are a fundamental mathematical framework used to analyze the most likely particle evolution based on the principle of least action including kinetic and potential energy. In parallel to their well-established presence in the theoretical realms of quantum mechanics and optimal transport, this paper focuses on an algorithmic perspective, aiming to enhance practical usage. Our motivated observation is that transportation problems with the optimality structures delineated by GSBs are pervasive across various scientific domains, such as generative modeling in machine learning, mean-field games in stochastic control, and more. Exploring the intrinsic connection between the mathematical modeling of GSBs and the modern algorithmic characterization therefore presents a crucial, yet untapped, avenue. In this paper, we reinterpret GSBs as probabilistic models and demonstrate that, with a delicate mathematical tool known as the nonlinear Feynman-Kac lemma, rich algorithmic concepts, such as likelihoods, variational gaps, and temporal differences, emerge naturally from the optimality structures of GSBs. The resulting computational framework, driven by deep learning and neural networks, operates in a fully continuous state space (i.e., mesh-free) and satisfies distribution constraints, setting it apart from prior numerical solvers relying on spatial discretization or constraint relaxation. We demonstrate the efficacy of our method in generative modeling and mean-field games, highlighting its transformative applications at the intersection of mathematical modeling, stochastic process, control, and machine learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20279v1-abstract-full').style.display = 'none'; document.getElementById('2412.20279v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18886">arXiv:2412.18886</a> <span> [<a href="https://arxiv.org/pdf/2412.18886">pdf</a>, <a href="https://arxiv.org/format/2412.18886">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Training for Graph Neural Networks via Graph Subspace Energy Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Ganlin Liu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Z">Ziling Liang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaowei Huang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xinping Yi</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+S">Shi Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18886v1-abstract-short" style="display: inline;"> Despite impressive capability in learning over graph-structured data, graph neural networks (GNN) suffer from adversarial topology perturbation in both training and inference phases. While adversarial training has demonstrated remarkable effectiveness in image classification tasks, its suitability for GNN models has been doubted until a recent advance that shifts the focus from transductive to ind… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18886v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18886v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18886v1-abstract-full" style="display: none;"> Despite impressive capability in learning over graph-structured data, graph neural networks (GNN) suffer from adversarial topology perturbation in both training and inference phases. While adversarial training has demonstrated remarkable effectiveness in image classification tasks, its suitability for GNN models has been doubted until a recent advance that shifts the focus from transductive to inductive learning. Still, GNN robustness in the inductive setting is under-explored, and it calls for deeper understanding of GNN adversarial training. To this end, we propose a new concept of graph subspace energy (GSE) -- a generalization of graph energy that measures graph stability -- of the adjacency matrix, as an indicator of GNN robustness against topology perturbations. To further demonstrate the effectiveness of such concept, we propose an adversarial training method with the perturbed graphs generated by maximizing the GSE regularization term, referred to as AT-GSE. To deal with the local and global topology perturbations raised respectively by LRBCD and PRBCD, we employ randomized SVD (RndSVD) and Nystrom low-rank approximation to favor the different aspects of the GSE terms. An extensive set of experiments shows that AT-GSE outperforms consistently the state-of-the-art GNN adversarial training methods over different homophily and heterophily datasets in terms of adversarial accuracy, whilst more surprisingly achieving a superior clean accuracy on non-perturbed graphs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18886v1-abstract-full').style.display = 'none'; document.getElementById('2412.18886v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17838">arXiv:2412.17838</a> <span> [<a href="https://arxiv.org/pdf/2412.17838">pdf</a>, <a href="https://arxiv.org/format/2412.17838">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Coordinated Power Smoothing Control for Wind Storage Integrated System with Physics-informed Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuyi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Huan Zhao</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuji Cao</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zibin Pan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guolong Liu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+G">Gaoqi Liang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junhua Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17838v1-abstract-short" style="display: inline;"> The Wind Storage Integrated System with Power Smoothing Control (PSC) has emerged as a promising solution to ensure both efficient and reliable wind energy generation. However, existing PSC strategies overlook the intricate interplay and distinct control frequencies between batteries and wind turbines, and lack consideration of wake effect and battery degradation cost. In this paper, a novel coord… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17838v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17838v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17838v1-abstract-full" style="display: none;"> The Wind Storage Integrated System with Power Smoothing Control (PSC) has emerged as a promising solution to ensure both efficient and reliable wind energy generation. However, existing PSC strategies overlook the intricate interplay and distinct control frequencies between batteries and wind turbines, and lack consideration of wake effect and battery degradation cost. In this paper, a novel coordinated control framework with hierarchical levels is devised to address these challenges effectively, which integrates the wake model and battery degradation model. In addition, after reformulating the problem as a Markov decision process, the multi-agent reinforcement learning method is introduced to overcome the bi-level characteristic of the problem. Moreover, a Physics-informed Neural Network-assisted Multi-agent Deep Deterministic Policy Gradient (PAMA-DDPG) algorithm is proposed to incorporate the power fluctuation differential equation and expedite the learning process. The effectiveness of the proposed methodology is evaluated through simulations conducted in four distinct scenarios using WindFarmSimulator (WFSim). The results demonstrate that the proposed algorithm facilitates approximately an 11% increase in total profit and a 19% decrease in power fluctuation compared to the traditional methods, thereby addressing the dual objectives of economic efficiency and grid-connected energy reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17838v1-abstract-full').style.display = 'none'; document.getElementById('2412.17838v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16780">arXiv:2412.16780</a> <span> [<a href="https://arxiv.org/pdf/2412.16780">pdf</a>, <a href="https://arxiv.org/format/2412.16780">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Forget Vectors at Play: Universal Input Perturbations Driving Machine Unlearning in Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+C">Changchang Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ren Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yihua Zhang</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+J">Jinghan Jia</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiancheng Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gaowen Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sijia Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yan Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16780v2-abstract-short" style="display: inline;"> Machine unlearning (MU), which seeks to erase the influence of specific unwanted data from already-trained models, is becoming increasingly vital in model editing, particularly to comply with evolving data regulations like the ``right to be forgotten''. Conventional approaches are predominantly model-based, typically requiring retraining or fine-tuning the model's weights to meet unlearning requir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16780v2-abstract-full').style.display = 'inline'; document.getElementById('2412.16780v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16780v2-abstract-full" style="display: none;"> Machine unlearning (MU), which seeks to erase the influence of specific unwanted data from already-trained models, is becoming increasingly vital in model editing, particularly to comply with evolving data regulations like the ``right to be forgotten''. Conventional approaches are predominantly model-based, typically requiring retraining or fine-tuning the model's weights to meet unlearning requirements. In this work, we approach the MU problem from a novel input perturbation-based perspective, where the model weights remain intact throughout the unlearning process. We demonstrate the existence of a proactive input-based unlearning strategy, referred to forget vector, which can be generated as an input-agnostic data perturbation and remains as effective as model-based approximate unlearning approaches. We also explore forget vector arithmetic, whereby multiple class-specific forget vectors are combined through simple operations (e.g., linear combinations) to generate new forget vectors for unseen unlearning tasks, such as forgetting arbitrary subsets across classes. Extensive experiments validate the effectiveness and adaptability of the forget vector, showcasing its competitive performance relative to state-of-the-art model-based methods. Codes are available at https://github.com/Changchangsun/Forget-Vector. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16780v2-abstract-full').style.display = 'none'; document.getElementById('2412.16780v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.15251">arXiv:2412.15251</a> <span> [<a href="https://arxiv.org/pdf/2412.15251">pdf</a>, <a href="https://arxiv.org/format/2412.15251">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AgentPS: Agentic Process Supervision for Multi-modal Content Quality Assurance through Multi-round QA </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gorden Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yu Sun</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+R">Ruixiao Sun</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+X">Xin Dong</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Hongyu Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.15251v1-abstract-short" style="display: inline;"> The advanced processing and reasoning capabilities of multimodal large language models (MLLMs) have driven substantial progress in vision-language (VL) understanding tasks. However, while effective for tasks governed by straightforward logic, MLLMs often encounter challenges when reasoning over complex, interdependent logic structures. To address this limitation, we introduce \textit{AgentPS}, a n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15251v1-abstract-full').style.display = 'inline'; document.getElementById('2412.15251v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.15251v1-abstract-full" style="display: none;"> The advanced processing and reasoning capabilities of multimodal large language models (MLLMs) have driven substantial progress in vision-language (VL) understanding tasks. However, while effective for tasks governed by straightforward logic, MLLMs often encounter challenges when reasoning over complex, interdependent logic structures. To address this limitation, we introduce \textit{AgentPS}, a novel framework that integrates Agentic Process Supervision into MLLMs via multi-round question answering during fine-tuning. \textit{AgentPS} demonstrates significant performance improvements over baseline MLLMs on proprietary TikTok datasets, due to its integration of process supervision and structured sequential reasoning. Furthermore, we show that replacing human-annotated labels with LLM-generated labels retains much of the performance gain, highlighting the framework's practical scalability in industrial applications. These results position \textit{AgentPS} as a highly effective and efficient architecture for multimodal classification tasks. Its adaptability and scalability, especially when enhanced by automated annotation generation, make it a powerful tool for handling large-scale, real-world challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15251v1-abstract-full').style.display = 'none'; document.getElementById('2412.15251v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.15122">arXiv:2412.15122</a> <span> [<a href="https://arxiv.org/pdf/2412.15122">pdf</a>, <a href="https://arxiv.org/format/2412.15122">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Solving the all pairs shortest path problem after minor update of a large dense graph </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gangli Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.15122v6-abstract-short" style="display: inline;"> The all pairs shortest path problem is a fundamental optimization problem in graph theory. We deal with re-calculating the all-pairs shortest path (APSP) matrix after a minor modification of a weighted dense graph, e.g., adding a node, removing a node, or updating an edge. We assume the APSP matrix for the original graph is already known. The graph can be directed or undirected. A cold-start calcu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15122v6-abstract-full').style.display = 'inline'; document.getElementById('2412.15122v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.15122v6-abstract-full" style="display: none;"> The all pairs shortest path problem is a fundamental optimization problem in graph theory. We deal with re-calculating the all-pairs shortest path (APSP) matrix after a minor modification of a weighted dense graph, e.g., adding a node, removing a node, or updating an edge. We assume the APSP matrix for the original graph is already known. The graph can be directed or undirected. A cold-start calculation of the new APSP matrix by traditional algorithms, like the Floyd-Warshall algorithm or Dijkstra's algorithm, needs $ O(n^3) $ time. We propose two algorithms for warm-start calculation of the new APSP matrix. The best case complexity for a warm-start calculation is $ O(n^2) $, the worst case complexity is $ O(n^3) $. We implemented the algorithms and tested their performance with experiments. The result shows a warm-start calculation can save a great portion of calculation time, compared with cold-start calculation. In addition, another algorithm is devised to warm-start calculate of the shortest path between two nodes. Experiment shows warm-start calculation can save 99\% of calculation time, compared with cold-start calculation by Dijkstra's algorithm, on directed complete graphs of large sizes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15122v6-abstract-full').style.display = 'none'; document.getElementById('2412.15122v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14538">arXiv:2412.14538</a> <span> [<a href="https://arxiv.org/pdf/2412.14538">pdf</a>, <a href="https://arxiv.org/format/2412.14538">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Overview of AI and Communication for 6G Network: Fundamentals, Challenges, and Future Research Opportunities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cui%2C+Q">Qimei Cui</a>, <a href="/search/cs?searchtype=author&query=You%2C+X">Xiaohu You</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+N">Ni Wei</a>, <a href="/search/cs?searchtype=author&query=Nan%2C+G">Guoshun Nan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuefei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+X">Xinchen Lyu</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+M">Ming Ai</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+X">Xiaofeng Tao</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Z">Zhiyong Feng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+M">Meixia Tao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yongming Huang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chongwen Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyi Liu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+C">Chenghui Peng</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zhiwen Pan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+T">Tao Sun</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tao Chen</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+M+K">Muhammad Khurram Khan</a>, <a href="/search/cs?searchtype=author&query=Jamalipour%2C+A">Abbas Jamalipour</a>, <a href="/search/cs?searchtype=author&query=Guizani%2C+M">Mohsen Guizani</a>, <a href="/search/cs?searchtype=author&query=Yuen%2C+C">Chau Yuen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14538v4-abstract-short" style="display: inline;"> With the growing demand for seamless connectivity and intelligent communication, the integration of artificial intelligence (AI) and sixth-generation (6G) communication networks has emerged as a transformative paradigm. By embedding AI capabilities across various network layers, this integration enables optimized resource allocation, improved efficiency, and enhanced system robust performance, par… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14538v4-abstract-full').style.display = 'inline'; document.getElementById('2412.14538v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14538v4-abstract-full" style="display: none;"> With the growing demand for seamless connectivity and intelligent communication, the integration of artificial intelligence (AI) and sixth-generation (6G) communication networks has emerged as a transformative paradigm. By embedding AI capabilities across various network layers, this integration enables optimized resource allocation, improved efficiency, and enhanced system robust performance, particularly in intricate and dynamic environments. This paper presents a comprehensive overview of AI and communication for 6G networks, with a focus on emphasizing their foundational principles, inherent challenges, and future research opportunities. We first review the integration of AI and communications in the context of 6G, exploring the driving factors behind incorporating AI into wireless communications, as well as the vision for the convergence of AI and 6G. The discourse then transitions to a detailed exposition of the envisioned integration of AI within 6G networks, delineated across three progressive developmental stages. The first stage, AI for Network, focuses on employing AI to augment network performance, optimize efficiency, and enhance user service experiences. The second stage, Network for AI, highlights the role of the network in facilitating and buttressing AI operations and presents key enabling technologies, such as digital twins for AI and semantic communication. In the final stage, AI as a Service, it is anticipated that future 6G networks will innately provide AI functions as services, supporting application scenarios like immersive communication and intelligent industrial robots. In addition, we conduct an in-depth analysis of the critical challenges faced by the integration of AI and communications in 6G. Finally, we outline promising future research opportunities that are expected to drive the development and refinement of AI and 6G communications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14538v4-abstract-full').style.display = 'none'; document.getElementById('2412.14538v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12522">arXiv:2412.12522</a> <span> [<a href="https://arxiv.org/pdf/2412.12522">pdf</a>, <a href="https://arxiv.org/format/2412.12522">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Solid-SQL: Enhanced Schema-linking based In-context Learning for Robust Text-to-SQL </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Geling Liu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Y">Yunzhi Tan</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+R">Ruichao Zhong</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yuanzhen Xie</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Lingchen Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qian Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+B">Bo Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12522v1-abstract-short" style="display: inline;"> Recently, large language models (LLMs) have significantly improved the performance of text-to-SQL systems. Nevertheless, many state-of-the-art (SOTA) approaches have overlooked the critical aspect of system robustness. Our experiments reveal that while LLM-driven methods excel on standard datasets, their accuracy is notably compromised when faced with adversarial perturbations. To address this cha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12522v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12522v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12522v1-abstract-full" style="display: none;"> Recently, large language models (LLMs) have significantly improved the performance of text-to-SQL systems. Nevertheless, many state-of-the-art (SOTA) approaches have overlooked the critical aspect of system robustness. Our experiments reveal that while LLM-driven methods excel on standard datasets, their accuracy is notably compromised when faced with adversarial perturbations. To address this challenge, we propose a robust text-to-SQL solution, called Solid-SQL, designed to integrate with various LLMs. We focus on the pre-processing stage, training a robust schema-linking model enhanced by LLM-based data augmentation. Additionally, we design a two-round, structural similarity-based example retrieval strategy for in-context learning. Our method achieves SOTA SQL execution accuracy levels of 82.1% and 58.9% on the general Spider and Bird benchmarks, respectively. Furthermore, experimental results show that Solid-SQL delivers an average improvement of 11.6% compared to baselines on the perturbed Spider-Syn, Spider-Realistic, and Dr. Spider benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12522v1-abstract-full').style.display = 'none'; document.getElementById('2412.12522v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at COLING 2025 Main</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Liu%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Liu%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+G&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+G&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>