Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 58 results for author: <span class="mathjax">Mi, H</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Mi%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Mi, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Mi%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Mi, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Mi%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Mi%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Mi%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16852">arXiv:2502.16852</a> <span> [<a href="https://arxiv.org/pdf/2502.16852">pdf</a>, <a href="https://arxiv.org/format/2502.16852">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Improving LLM General Preference Alignment via Optimistic Online Mirror Descent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+T">Tao Ge</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhichen Zeng</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+N">Nan Jiang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16852v1-abstract-short" style="display: inline;"> Reinforcement learning from human feedback (RLHF) has demonstrated remarkable effectiveness in aligning large language models (LLMs) with human preferences. Many existing alignment approaches rely on the Bradley-Terry (BT) model assumption, which assumes the existence of a ground-truth reward for each prompt-response pair. However, this assumption can be overly restrictive when modeling complex hu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16852v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16852v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16852v1-abstract-full" style="display: none;"> Reinforcement learning from human feedback (RLHF) has demonstrated remarkable effectiveness in aligning large language models (LLMs) with human preferences. Many existing alignment approaches rely on the Bradley-Terry (BT) model assumption, which assumes the existence of a ground-truth reward for each prompt-response pair. However, this assumption can be overly restrictive when modeling complex human preferences. In this paper, we drop the BT model assumption and study LLM alignment under general preferences, formulated as a two-player game. Drawing on theoretical insights from learning in games, we integrate optimistic online mirror descent into our alignment framework to approximate the Nash policy. Theoretically, we demonstrate that our approach achieves an $O(T^{-1})$ bound on the duality gap, improving upon the previous $O(T^{-1/2})$ result. More importantly, we implement our method and show through experiments that it outperforms state-of-the-art RLHF algorithms across multiple representative benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16852v1-abstract-full').style.display = 'none'; document.getElementById('2502.16852v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11183">arXiv:2502.11183</a> <span> [<a href="https://arxiv.org/pdf/2502.11183">pdf</a>, <a href="https://arxiv.org/format/2502.11183">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Don't Get Lost in the Trees: Streamlining LLM Reasoning by Overcoming Tree Search Exploration Pitfalls </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+A">Ante Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+X">Xiangyu Duan</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+Z">Zhaopeng Tu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Jinsong Su</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11183v1-abstract-short" style="display: inline;"> Recent advancements in tree search algorithms guided by verifiers have significantly enhanced the reasoning capabilities of large language models (LLMs), but at the cost of increased computational resources. In this work, we identify two key challenges contributing to this inefficiency: $\textit{over-exploration}$ due to redundant states with semantically equivalent content, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11183v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11183v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11183v1-abstract-full" style="display: none;"> Recent advancements in tree search algorithms guided by verifiers have significantly enhanced the reasoning capabilities of large language models (LLMs), but at the cost of increased computational resources. In this work, we identify two key challenges contributing to this inefficiency: $\textit{over-exploration}$ due to redundant states with semantically equivalent content, and $\textit{under-exploration}$ caused by high variance in verifier scoring leading to frequent trajectory switching. To address these issues, we propose FETCH, an e$\textbf{f}$fici$\textbf{e}$nt $\textbf{t}$ree sear$\textbf{ch}$ framework, which is a flexible, plug-and-play system compatible with various tree search algorithms. Our framework mitigates over-exploration by merging semantically similar states using agglomerative clustering of text embeddings obtained from a fine-tuned SimCSE model. To tackle under-exploration, we enhance verifiers by incorporating temporal difference learning with adjusted $位$-returns during training to reduce variance, and employing a verifier ensemble to aggregate scores during inference. Experiments on GSM8K, GSM-Plus, and MATH datasets demonstrate that our methods significantly improve reasoning accuracy and computational efficiency across four different tree search algorithms, paving the way for more practical applications of LLM-based reasoning. The code will be released upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11183v1-abstract-full').style.display = 'none'; document.getElementById('2502.11183v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18585">arXiv:2501.18585</a> <span> [<a href="https://arxiv.org/pdf/2501.18585">pdf</a>, <a href="https://arxiv.org/format/2501.18585">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Thoughts Are All Over the Place: On the Underthinking of o1-Like LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qiuzhi Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiahao Xu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+T">Tian Liang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xingyu Chen</a>, <a href="/search/cs?searchtype=author&query=He%2C+Z">Zhiwei He</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Juntao Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhuosheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+Z">Zhaopeng Tu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18585v2-abstract-short" style="display: inline;"> Large language models (LLMs) such as OpenAI's o1 have demonstrated remarkable abilities in complex reasoning tasks by scaling test-time compute and exhibiting human-like deep thinking. However, we identify a phenomenon we term underthinking, where o1-like LLMs frequently switch between different reasoning thoughts without sufficiently exploring promising paths to reach a correct solution. This beh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18585v2-abstract-full').style.display = 'inline'; document.getElementById('2501.18585v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18585v2-abstract-full" style="display: none;"> Large language models (LLMs) such as OpenAI's o1 have demonstrated remarkable abilities in complex reasoning tasks by scaling test-time compute and exhibiting human-like deep thinking. However, we identify a phenomenon we term underthinking, where o1-like LLMs frequently switch between different reasoning thoughts without sufficiently exploring promising paths to reach a correct solution. This behavior leads to inadequate depth of reasoning and decreased performance, particularly on challenging mathematical problems. To systematically analyze this issue, we conduct experiments on three challenging test sets and two representative open-source o1-like models, revealing that frequent thought switching correlates with incorrect responses. We introduce a novel metric to quantify underthinking by measuring token efficiency in incorrect answers. To address underthinking, we propose a decoding strategy with thought switching penalty TIP that discourages premature transitions between thoughts, encouraging deeper exploration of each reasoning path. Experimental results demonstrate that our approach improves accuracy across challenging datasets without requiring model fine-tuning. Our findings contribute to understanding reasoning inefficiencies in o1-like LLMs and offer a practical solution to enhance their problem-solving capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18585v2-abstract-full').style.display = 'none'; document.getElementById('2501.18585v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">1. We have updated the results for DeepSeek-R1, and all of our original conclusions remain valid. 2. Our proposed Tip approach remains effective in Best-of-N scenarios (e.g., self-consistency and Laconic Decoding) when built on DeepSeek-R1</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.21187">arXiv:2412.21187</a> <span> [<a href="https://arxiv.org/pdf/2412.21187">pdf</a>, <a href="https://arxiv.org/format/2412.21187">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Do NOT Think That Much for 2+3=? On the Overthinking of o1-Like LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xingyu Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiahao Xu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+T">Tian Liang</a>, <a href="/search/cs?searchtype=author&query=He%2C+Z">Zhiwei He</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+J">Jianhui Pang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qiuzhi Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Mengfei Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhuosheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+Z">Zhaopeng Tu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.21187v2-abstract-short" style="display: inline;"> The remarkable performance of models like the OpenAI o1 can be attributed to their ability to emulate human-like long-time thinking during inference. These models employ extended chain-of-thought (CoT) processes, exploring multiple strategies to enhance problem-solving capabilities. However, a critical question remains: How to intelligently and efficiently scale computational resources during test… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.21187v2-abstract-full').style.display = 'inline'; document.getElementById('2412.21187v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.21187v2-abstract-full" style="display: none;"> The remarkable performance of models like the OpenAI o1 can be attributed to their ability to emulate human-like long-time thinking during inference. These models employ extended chain-of-thought (CoT) processes, exploring multiple strategies to enhance problem-solving capabilities. However, a critical question remains: How to intelligently and efficiently scale computational resources during testing. This paper presents the first comprehensive study on the prevalent issue of overthinking in these models, where excessive computational resources are allocated for simple problems with minimal benefit. We introduce novel efficiency metrics from both outcome and process perspectives to evaluate the rational use of computational resources by o1-like models. Using a self-training paradigm, we propose strategies to mitigate overthinking, streamlining reasoning processes without compromising accuracy. Experimental results show that our approach successfully reduces computational overhead while preserving model performance across a range of testsets with varying difficulty levels, such as GSM8K, MATH500, GPQA, and AIME. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.21187v2-abstract-full').style.display = 'none'; document.getElementById('2412.21187v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">We have updated the results of DeepSeek-R1, and all conclusions still hold</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20735">arXiv:2412.20735</a> <span> [<a href="https://arxiv.org/pdf/2412.20735">pdf</a>, <a href="https://arxiv.org/format/2412.20735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> HUNYUANPROVER: A Scalable Data Synthesis Framework and Guided Tree Search for Automated Theorem Proving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&query=Du%2C+D">Dong Du</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chen Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weikang Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+T">Tao Yang</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20735v2-abstract-short" style="display: inline;"> We introduce HunyuanProver, an language model finetuned from the Hunyuan 7B for interactive automatic theorem proving with LEAN4. To alleviate the data sparsity issue, we design a scalable framework to iterative synthesize data with low cost. Besides, guided tree search algorithms are designed to enable effective ``system 2 thinking`` of the prover. HunyuanProver achieves state-of-the-art (SOTA) p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20735v2-abstract-full').style.display = 'inline'; document.getElementById('2412.20735v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20735v2-abstract-full" style="display: none;"> We introduce HunyuanProver, an language model finetuned from the Hunyuan 7B for interactive automatic theorem proving with LEAN4. To alleviate the data sparsity issue, we design a scalable framework to iterative synthesize data with low cost. Besides, guided tree search algorithms are designed to enable effective ``system 2 thinking`` of the prover. HunyuanProver achieves state-of-the-art (SOTA) performances on major benchmarks. Specifically, it achieves a pass of 68.4% on the miniF2F-test compared to 65.9%, the current SOTA results. It proves 4 IMO statements (imo_1960_p2, imo_1962_p2}, imo_1964_p2 and imo_1983_p6) in miniF2F-test. To benefit the community, we will open-source a dataset of 30k synthesized instances, where each instance contains the original question in natural language, the converted statement by autoformalization, and the proof by HunyuanProver. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20735v2-abstract-full').style.display = 'none'; document.getElementById('2412.20735v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18819">arXiv:2412.18819</a> <span> [<a href="https://arxiv.org/pdf/2412.18819">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LLM-assisted Vector Similarity Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Riyadh%2C+M">Md Riyadh</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Muqi Li</a>, <a href="/search/cs?searchtype=author&query=Lie%2C+F+H">Felix Haryanto Lie</a>, <a href="/search/cs?searchtype=author&query=Loh%2C+J+L">Jia Long Loh</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haotian Mi</a>, <a href="/search/cs?searchtype=author&query=Bohra%2C+S">Sayam Bohra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18819v2-abstract-short" style="display: inline;"> As data retrieval demands become increasingly complex, traditional search methods often fall short in addressing nuanced and conceptual queries. Vector similarity search has emerged as a promising technique for finding semantically similar information efficiently. However, its effectiveness diminishes when handling intricate queries with contextual nuances. This paper explores a hybrid approach co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18819v2-abstract-full').style.display = 'inline'; document.getElementById('2412.18819v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18819v2-abstract-full" style="display: none;"> As data retrieval demands become increasingly complex, traditional search methods often fall short in addressing nuanced and conceptual queries. Vector similarity search has emerged as a promising technique for finding semantically similar information efficiently. However, its effectiveness diminishes when handling intricate queries with contextual nuances. This paper explores a hybrid approach combining vector similarity search with Large Language Models (LLMs) to enhance search accuracy and relevance. The proposed two-step solution first employs vector similarity search to shortlist potential matches, followed by an LLM for context-aware ranking of the results. Experiments on structured datasets demonstrate that while vector similarity search alone performs well for straightforward queries, the LLM-assisted approach excels in processing complex queries involving constraints, negations, or conceptual requirements. By leveraging the natural language understanding capabilities of LLMs, this method improves the accuracy of search results for complex tasks without sacrificing efficiency. We also discuss real-world applications and propose directions for future research to refine and scale this technique for diverse datasets and use cases. Original article: https://engineering.grab.com/llm-assisted-vector-similarity-search <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18819v2-abstract-full').style.display = 'none'; document.getElementById('2412.18819v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16871">arXiv:2412.16871</a> <span> [<a href="https://arxiv.org/pdf/2412.16871">pdf</a>, <a href="https://arxiv.org/format/2412.16871">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Teaching LLMs to Refine with Tools </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiahao Xu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+T">Tian Liang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+Z">Zhaopeng Tu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16871v1-abstract-short" style="display: inline;"> Large language models (LLMs) can refine their responses based on feedback, enabling self-improvement through iterative training or test-time refinement. However, existing methods predominantly focus on refinement within the same reasoning format, which may lead to non-correcting behaviors. We propose CaP, a novel approach that uses external tools to refine chain-of-thought (CoT) responses generate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16871v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16871v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16871v1-abstract-full" style="display: none;"> Large language models (LLMs) can refine their responses based on feedback, enabling self-improvement through iterative training or test-time refinement. However, existing methods predominantly focus on refinement within the same reasoning format, which may lead to non-correcting behaviors. We propose CaP, a novel approach that uses external tools to refine chain-of-thought (CoT) responses generated by the same or other LLMs. CaP employs a two-stage training process: supervised fine-tuning followed by preference optimization with DPO variants. Our observations highlight the critical role of preference optimization in enabling effective refinement. Additionally, we compare several sampling strategies to leverage CoT and tools at inference time. Experimental results demonstrate CaP's potential for effective cross-reasoning refinement and efficient inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16871v1-abstract-full').style.display = 'none'; document.getElementById('2412.16871v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06720">arXiv:2412.06720</a> <span> [<a href="https://arxiv.org/pdf/2412.06720">pdf</a>, <a href="https://arxiv.org/format/2412.06720">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> VP-MEL: Visual Prompts Guided Multimodal Entity Linking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mi%2C+H">Hongze Mi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jinyuan Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuying Zhang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+H">Haoran Cheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiahao Wang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+D">Di Sun</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+G">Gang Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06720v4-abstract-short" style="display: inline;"> Multimodal entity linking (MEL), a task aimed at linking mentions within multimodal contexts to their corresponding entities in a knowledge base (KB), has attracted much attention due to its wide applications in recent years. However, existing MEL methods often rely on mention words as retrieval cues, which limits their ability to effectively utilize information from both images and text. This rel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06720v4-abstract-full').style.display = 'inline'; document.getElementById('2412.06720v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06720v4-abstract-full" style="display: none;"> Multimodal entity linking (MEL), a task aimed at linking mentions within multimodal contexts to their corresponding entities in a knowledge base (KB), has attracted much attention due to its wide applications in recent years. However, existing MEL methods often rely on mention words as retrieval cues, which limits their ability to effectively utilize information from both images and text. This reliance causes MEL to struggle with accurately retrieving entities in certain scenarios, especially when the focus is on image objects or mention words are missing from the text. To solve these issues, we introduce a Visual Prompts guided Multimodal Entity Linking (VP-MEL) task. Given a text-image pair, VP-MEL aims to link a marked region (i.e., visual prompt) in an image to its corresponding entities in the knowledge base. To facilitate this task, we present a new dataset, VPWiki, specifically designed for VP-MEL. Furthermore, we propose a framework named IIER, which enhances visual feature extraction using visual prompts and leverages the pretrained Detective-VLM model to capture latent information. Experimental results on the VPWiki dataset demonstrate that IIER outperforms baseline methods across multiple benchmarks for the VP-MEL task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06720v4-abstract-full').style.display = 'none'; document.getElementById('2412.06720v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17691">arXiv:2411.17691</a> <span> [<a href="https://arxiv.org/pdf/2411.17691">pdf</a>, <a href="https://arxiv.org/format/2411.17691">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Low-Bit Quantization Favors Undertrained LLMs: Scaling Laws for Quantized LLMs with 100T Training Tokens </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ouyang%2C+X">Xu Ouyang</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+T">Tao Ge</a>, <a href="/search/cs?searchtype=author&query=Hartvigsen%2C+T">Thomas Hartvigsen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhisong Zhang</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17691v2-abstract-short" style="display: inline;"> We reveal that low-bit quantization favors undertrained large language models (LLMs) by observing that models with larger sizes or fewer training tokens experience less quantization-induced degradation (QiD) when applying low-bit quantization, whereas smaller models with extensive training tokens suffer significant QiD. To gain deeper insights into this trend, we study over 1500 quantized LLM chec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17691v2-abstract-full').style.display = 'inline'; document.getElementById('2411.17691v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17691v2-abstract-full" style="display: none;"> We reveal that low-bit quantization favors undertrained large language models (LLMs) by observing that models with larger sizes or fewer training tokens experience less quantization-induced degradation (QiD) when applying low-bit quantization, whereas smaller models with extensive training tokens suffer significant QiD. To gain deeper insights into this trend, we study over 1500 quantized LLM checkpoints of various sizes and at different training levels (undertrained or fully trained) in a controlled setting, deriving scaling laws for understanding the relationship between QiD and factors such as the number of training tokens, model size and bit width. With the derived scaling laws, we propose a novel perspective that we can use QiD to measure an LLM's training levels and determine the number of training tokens required for fully training LLMs of various sizes. Moreover, we use the scaling laws to predict the quantization performance of different-sized LLMs trained with 100 trillion tokens. Our projection shows that the low-bit quantization performance of future models, which are expected to be trained with over 100 trillion tokens, may NOT be desirable. This poses a potential challenge for low-bit quantization in the future and highlights the need for awareness of a model's training level when evaluating low-bit quantization research. To facilitate future research on this problem, we release all the 1500+ quantized checkpoints used in this work at https://huggingface.co/Xu-Ouyang. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17691v2-abstract-full').style.display = 'none'; document.getElementById('2411.17691v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in Progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06508">arXiv:2410.06508</a> <span> [<a href="https://arxiv.org/pdf/2410.06508">pdf</a>, <a href="https://arxiv.org/format/2410.06508">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Towards Self-Improvement of LLMs via MCTS: Leveraging Stepwise Knowledge with Curriculum Preference Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiyao Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Furong Huang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06508v1-abstract-short" style="display: inline;"> Monte Carlo Tree Search (MCTS) has recently emerged as a powerful technique for enhancing the reasoning capabilities of LLMs. Techniques such as SFT or DPO have enabled LLMs to distill high-quality behaviors from MCTS, improving their reasoning performance. However, existing distillation methods underutilize the rich trajectory information generated by MCTS, limiting the potential for improvements… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06508v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06508v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06508v1-abstract-full" style="display: none;"> Monte Carlo Tree Search (MCTS) has recently emerged as a powerful technique for enhancing the reasoning capabilities of LLMs. Techniques such as SFT or DPO have enabled LLMs to distill high-quality behaviors from MCTS, improving their reasoning performance. However, existing distillation methods underutilize the rich trajectory information generated by MCTS, limiting the potential for improvements in LLM reasoning. In this paper, we propose AlphaLLM-CPL, a novel pairwise training framework that enables LLMs to self-improve through MCTS behavior distillation. AlphaLLM-CPL efficiently leverages MCTS trajectories via two key innovations: (1) AlphaLLM-CPL constructs stepwise trajectory pairs from child nodes sharing the same parent in the search tree, providing step-level information for more effective MCTS behavior distillation. (2) AlphaLLM-CPL introduces curriculum preference learning, dynamically adjusting the training sequence of trajectory pairs in each offline training epoch to prioritize critical learning steps and mitigate overfitting. Experimental results on mathematical reasoning tasks demonstrate that AlphaLLM-CPL significantly outperforms previous MCTS behavior distillation methods, substantially boosting the reasoning capabilities of LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06508v1-abstract-full').style.display = 'none'; document.getElementById('2410.06508v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03864">arXiv:2410.03864</a> <span> [<a href="https://arxiv.org/pdf/2410.03864">pdf</a>, <a href="https://arxiv.org/format/2410.03864">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DOTS: Learning to Reason Dynamically in LLMs via Optimal Reasoning Trajectories Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yue%2C+M">Murong Yue</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+W">Wenlin Yao</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Z">Ziyu Yao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03864v1-abstract-short" style="display: inline;"> Enhancing the capability of large language models (LLMs) in reasoning has gained significant attention in recent years. Previous studies have demonstrated the effectiveness of various prompting strategies in aiding LLMs in reasoning (called "reasoning actions"), such as step-by-step thinking, reflecting before answering, solving with programs, and their combinations. However, these approaches ofte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03864v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03864v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03864v1-abstract-full" style="display: none;"> Enhancing the capability of large language models (LLMs) in reasoning has gained significant attention in recent years. Previous studies have demonstrated the effectiveness of various prompting strategies in aiding LLMs in reasoning (called "reasoning actions"), such as step-by-step thinking, reflecting before answering, solving with programs, and their combinations. However, these approaches often applied static, predefined reasoning actions uniformly to all questions, without considering the specific characteristics of each question or the capability of the task-solving LLM. In this paper, we propose DOTS, an approach enabling LLMs to reason dynamically via optimal reasoning trajectory search, tailored to the specific characteristics of each question and the inherent capability of the task-solving LLM. Our approach involves three key steps: i) defining atomic reasoning action modules that can be composed into various reasoning action trajectories; ii) searching for the optimal action trajectory for each training question through iterative exploration and evaluation for the specific task-solving LLM; and iii) using the collected optimal trajectories to train an LLM to plan for the reasoning trajectories of unseen questions. In particular, we propose two learning paradigms, i.e., fine-tuning an external LLM as a planner to guide the task-solving LLM, or directly fine-tuning the task-solving LLM with an internalized capability for reasoning actions planning. Our experiments across eight reasoning tasks show that our method consistently outperforms static reasoning techniques and the vanilla instruction tuning approach. Further analysis reveals that our method enables LLMs to adjust their computation based on problem complexity, allocating deeper thinking and reasoning to harder problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03864v1-abstract-full').style.display = 'none'; document.getElementById('2410.03864v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17433">arXiv:2409.17433</a> <span> [<a href="https://arxiv.org/pdf/2409.17433">pdf</a>, <a href="https://arxiv.org/format/2409.17433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HDFlow: Enhancing LLM Complex Problem-Solving with Hybrid Thinking and Dynamic Workflows </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+W">Wenlin Yao</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17433v1-abstract-short" style="display: inline;"> Despite recent advancements in large language models (LLMs), their performance on complex reasoning problems requiring multi-step thinking and combining various skills is still limited. To address this, we propose a novel framework HDFlow for complex reasoning with LLMs that combines fast and slow thinking modes in an adaptive manner. Our approach consists of two key components: 1) a new approach… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17433v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17433v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17433v1-abstract-full" style="display: none;"> Despite recent advancements in large language models (LLMs), their performance on complex reasoning problems requiring multi-step thinking and combining various skills is still limited. To address this, we propose a novel framework HDFlow for complex reasoning with LLMs that combines fast and slow thinking modes in an adaptive manner. Our approach consists of two key components: 1) a new approach for slow, deliberate reasoning called Dynamic Workflow, which automatically decomposes complex problems into more manageable sub-tasks and dynamically designs a workflow to assemble specialized LLM or symbolic reasoning tools to solve sub-tasks; 2) Hybrid Thinking, a general framework that dynamically combines fast and slow thinking based on problem complexity. Finally, we propose an easy-to-scale method for automatically synthesizing a large-scale dataset of 27K challenging reasoning problems for complex reasoning and a hybrid thinking tuning method that trains smaller LLMs on this dataset to internalize the fast/slow hybrid reasoning strategies. Experiments on four reasoning benchmark datasets demonstrate that our slow thinking with dynamic workflows significantly outperforms Chain-of-Thought, and hybrid thinking achieves the highest accuracy while providing an effective balance between computational efficiency and performance. Fine-tuning using our hybrid thinking approach also significantly boosts the complex reasoning capabilities of open-source language models. The results showcase the promise of slow thinking, dynamic workflows, and hybrid thinking in expanding the frontier of complex problem-solving with LLMs\footnote{Code and data will be released at \url{https://github.com/wenlinyao/HDFlow}.}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17433v1-abstract-full').style.display = 'none'; document.getElementById('2409.17433v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02123">arXiv:2409.02123</a> <span> [<a href="https://arxiv.org/pdf/2409.02123">pdf</a>, <a href="https://arxiv.org/format/2409.02123">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> </div> </div> <p class="title is-5 mathjax"> PuYun: Medium-Range Global Weather Forecasting Using Large Kernel Attention Convolutional Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+S">Shengchen Zhu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yiming Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+P">Peiying Yu</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+X">Xiang Qu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuxiao Zhou</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yiming Ma</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhizhan Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yukai Liu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Hao Mi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02123v2-abstract-short" style="display: inline;"> Accurate weather forecasting is essential for understanding and mitigating weather-related impacts. In this paper, we present PuYun, an autoregressive cascade model that leverages large kernel attention convolutional networks. The model's design inherently supports extended weather prediction horizons while broadening the effective receptive field. The integration of large kernel attention mechani… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02123v2-abstract-full').style.display = 'inline'; document.getElementById('2409.02123v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02123v2-abstract-full" style="display: none;"> Accurate weather forecasting is essential for understanding and mitigating weather-related impacts. In this paper, we present PuYun, an autoregressive cascade model that leverages large kernel attention convolutional networks. The model's design inherently supports extended weather prediction horizons while broadening the effective receptive field. The integration of large kernel attention mechanisms within the convolutional layers enhances the model's capacity to capture fine-grained spatial details, thereby improving its predictive accuracy for meteorological phenomena. We introduce PuYun, comprising PuYun-Short for 0-5 day forecasts and PuYun-Medium for 5-10 day predictions. This approach enhances the accuracy of 10-day weather forecasting. Through evaluation, we demonstrate that PuYun-Short alone surpasses the performance of both GraphCast and FuXi-Short in generating accurate 10-day forecasts. Specifically, on the 10th day, PuYun-Short reduces the RMSE for Z500 to 720 $m^2/s^2$, compared to 732 $m^2/s^2$ for GraphCast and 740 $m^2/s^2$ for FuXi-Short. Additionally, the RMSE for T2M is reduced to 2.60 K, compared to 2.63 K for GraphCast and 2.65 K for FuXi-Short. Furthermore, when employing a cascaded approach by integrating PuYun-Short and PuYun-Medium, our method achieves superior results compared to the combined performance of FuXi-Short and FuXi-Medium. On the 10th day, the RMSE for Z500 is further reduced to 638 $m^2/s^2$, compared to 641 $m^2/s^2$ for FuXi. These findings underscore the effectiveness of our model ensemble in advancing medium-range weather prediction. Our training code and model will be open-sourced. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02123v2-abstract-full').style.display = 'none'; document.getElementById('2409.02123v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15565">arXiv:2408.15565</a> <span> [<a href="https://arxiv.org/pdf/2408.15565">pdf</a>, <a href="https://arxiv.org/format/2408.15565">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SIaM: Self-Improving Code-Assisted Mathematical Reasoning of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15565v1-abstract-short" style="display: inline;"> There is a growing trend of teaching large language models (LLMs) to solve mathematical problems through coding. Existing studies primarily focus on prompting powerful, closed-source models to generate seed training data followed by in-domain data augmentation, equipping LLMs with considerable capabilities for code-aided mathematical reasoning. However, continually training these models on augment… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15565v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15565v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15565v1-abstract-full" style="display: none;"> There is a growing trend of teaching large language models (LLMs) to solve mathematical problems through coding. Existing studies primarily focus on prompting powerful, closed-source models to generate seed training data followed by in-domain data augmentation, equipping LLMs with considerable capabilities for code-aided mathematical reasoning. However, continually training these models on augmented data derived from a few datasets such as GSM8K may impair their generalization abilities and restrict their effectiveness to a narrow range of question types. Conversely, the potential of improving such LLMs by leveraging large-scale, expert-written, diverse math question-answer pairs remains unexplored. To utilize these resources and tackle unique challenges such as code response assessment, we propose a novel paradigm that uses a code-based critic model to guide steps including question-code data construction, quality control, and complementary evaluation. We also explore different alignment algorithms with self-generated instruction/preference data to foster continuous improvement. Experiments across both in-domain (up to +5.7%) and out-of-domain (+4.4%) benchmarks in English and Chinese demonstrate the effectiveness of the proposed paradigm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15565v1-abstract-full').style.display = 'none'; document.getElementById('2408.15565v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06601">arXiv:2408.06601</a> <span> [<a href="https://arxiv.org/pdf/2408.06601">pdf</a>, <a href="https://arxiv.org/format/2408.06601">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> HiRegEx: Interactive Visual Query and Exploration of Multivariate Hierarchical Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+G">Guozheng Li</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haotian Mi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C+H">Chi Harold Liu</a>, <a href="/search/cs?searchtype=author&query=Itoh%2C+T">Takayuki Itoh</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guoren Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06601v1-abstract-short" style="display: inline;"> When using exploratory visual analysis to examine multivariate hierarchical data, users often need to query data to narrow down the scope of analysis. However, formulating effective query expressions remains a challenge for multivariate hierarchical data, particularly when datasets become very large. To address this issue, we develop a declarative grammar, HiRegEx (Hierarchical data Regular Expres… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06601v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06601v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06601v1-abstract-full" style="display: none;"> When using exploratory visual analysis to examine multivariate hierarchical data, users often need to query data to narrow down the scope of analysis. However, formulating effective query expressions remains a challenge for multivariate hierarchical data, particularly when datasets become very large. To address this issue, we develop a declarative grammar, HiRegEx (Hierarchical data Regular Expression), for querying and exploring multivariate hierarchical data. Rooted in the extended multi-level task topology framework for tree visualizations (e-MLTT), HiRegEx delineates three query targets (node, path, and subtree) and two aspects for querying these targets (features and positions), and uses operators developed based on classical regular expressions for query construction. Based on the HiRegEx grammar, we develop an exploratory framework for querying and exploring multivariate hierarchical data and integrate it into the TreeQueryER prototype system. The exploratory framework includes three major components: top-down pattern specification, bottom-up data-driven inquiry, and context-creation data overview. We validate the expressiveness of HiRegEx with the tasks from the e-MLTT framework and showcase the utility and effectiveness of TreeQueryER system through a case study involving expert users in the analysis of a citation tree dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06601v1-abstract-full').style.display = 'none'; document.getElementById('2408.06601v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 8 figures, accepted at IEEE VIS 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 65D18 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.3.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17086">arXiv:2407.17086</a> <span> [<a href="https://arxiv.org/pdf/2407.17086">pdf</a>, <a href="https://arxiv.org/format/2407.17086">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> AI-Gadget Kit: Integrating Swarm User Interfaces with LLM-driven Agents for Rich Tabletop Game Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yijie Guo</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhenhan Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruhan Wang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Z">Zhihao Yao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tianyu Yu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhiling Xu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xinyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xueqing Li</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haipeng Mi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17086v1-abstract-short" style="display: inline;"> While Swarm User Interfaces (SUIs) have succeeded in enriching tangible interaction experiences, their limitations in autonomous action planning have hindered the potential for personalized and dynamic interaction generation in tabletop games. Based on the AI-Gadget Kit we developed, this paper explores how to integrate LLM-driven agents within tabletop games to enable SUIs to execute complex inte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17086v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17086v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17086v1-abstract-full" style="display: none;"> While Swarm User Interfaces (SUIs) have succeeded in enriching tangible interaction experiences, their limitations in autonomous action planning have hindered the potential for personalized and dynamic interaction generation in tabletop games. Based on the AI-Gadget Kit we developed, this paper explores how to integrate LLM-driven agents within tabletop games to enable SUIs to execute complex interaction tasks. After defining the design space of this kit, we elucidate the method for designing agents that can extend the meta-actions of SUIs to complex motion planning. Furthermore, we introduce an add-on prompt method that simplifies the design process for four interaction behaviors and four interaction relationships in tabletop games. Lastly, we present several application scenarios that illustrate the potential of AI-Gadget Kit to construct personalized interaction in SUI tabletop games. We expect to use our work as a case study to inspire research on multi-agent-driven SUI for other scenarios with complex interaction tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17086v1-abstract-full').style.display = 'none'; document.getElementById('2407.17086v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00617">arXiv:2407.00617</a> <span> [<a href="https://arxiv.org/pdf/2407.00617">pdf</a>, <a href="https://arxiv.org/format/2407.00617">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Iterative Nash Policy Optimization: Aligning LLMs with General Preferences via No-Regret Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+M">Mingyue Huo</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+N">Nan Jiang</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00617v3-abstract-short" style="display: inline;"> Reinforcement Learning with Human Feedback (RLHF) has achieved great success in aligning large language models (LLMs) with human preferences. Prevalent RLHF approaches are reward-based, following the Bradley-Terry (BT) model assumption, which may not fully capture the complexity of human preferences. In this paper, we explore RLHF under a general preference framework and approach it from a game-th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00617v3-abstract-full').style.display = 'inline'; document.getElementById('2407.00617v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00617v3-abstract-full" style="display: none;"> Reinforcement Learning with Human Feedback (RLHF) has achieved great success in aligning large language models (LLMs) with human preferences. Prevalent RLHF approaches are reward-based, following the Bradley-Terry (BT) model assumption, which may not fully capture the complexity of human preferences. In this paper, we explore RLHF under a general preference framework and approach it from a game-theoretic perspective. Specifically, we formulate the problem as a two-player game and propose a novel online algorithm, iterative Nash policy optimization (INPO). The key idea is to let the policy play against itself via no-regret learning, thereby approximating the Nash policy. Unlike previous methods, INPO bypasses the need for estimating the expected win rate for individual responses, which typically incurs high computational or annotation costs. Instead, we introduce a new loss objective that is directly minimized over a preference dataset. We provide theoretical analysis for our approach and demonstrate its effectiveness through experiments on various representative benchmarks. With an LLaMA-3-8B-based SFT model, INPO achieves a 42.6% length-controlled win rate on AlpacaEval 2.0 and a 37.8% win rate on Arena-Hard, showing substantial improvement over the state-of-the-art online RLHF algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00617v3-abstract-full').style.display = 'none'; document.getElementById('2407.00617v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00320">arXiv:2407.00320</a> <span> [<a href="https://arxiv.org/pdf/2407.00320">pdf</a>, <a href="https://arxiv.org/format/2407.00320">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LiteSearch: Efficacious Tree Search for LLM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+A">Ante Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Jinsong Su</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00320v1-abstract-short" style="display: inline;"> Recent research suggests that tree search algorithms (e.g. Monte Carlo Tree Search) can dramatically boost LLM performance on complex mathematical reasoning tasks. However, they often require more than 10 times the computational resources of greedy decoding due to wasteful search strategies, making them difficult to be deployed in practical applications. This study introduces a novel guided tree s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00320v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00320v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00320v1-abstract-full" style="display: none;"> Recent research suggests that tree search algorithms (e.g. Monte Carlo Tree Search) can dramatically boost LLM performance on complex mathematical reasoning tasks. However, they often require more than 10 times the computational resources of greedy decoding due to wasteful search strategies, making them difficult to be deployed in practical applications. This study introduces a novel guided tree search algorithm with dynamic node selection and node-level exploration budget (maximum number of children) calculation to tackle this issue. By considering the search progress towards the final answer (history) and the guidance from a value network (future) trained without any step-wise annotations, our algorithm iteratively selects the most promising tree node before expanding it within the boundaries of the allocated computational budget. Experiments conducted on the GSM8K and TabMWP datasets demonstrate that our approach not only offers competitive performance but also enjoys significantly lower computational costs compared to baseline methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00320v1-abstract-full').style.display = 'none'; document.getElementById('2407.00320v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.20094">arXiv:2406.20094</a> <span> [<a href="https://arxiv.org/pdf/2406.20094">pdf</a>, <a href="https://arxiv.org/format/2406.20094">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scaling Synthetic Data Creation with 1,000,000,000 Personas </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ge%2C+T">Tao Ge</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+X">Xin Chan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoyang Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.20094v2-abstract-short" style="display: inline;"> We propose a novel persona-driven data synthesis methodology that leverages various perspectives within a large language model (LLM) to create diverse synthetic data. To fully exploit this methodology at scale, we introduce Persona Hub -- a collection of 1 billion diverse personas automatically curated from web data. These 1 billion personas (~13% of the world's total population), acting as distri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.20094v2-abstract-full').style.display = 'inline'; document.getElementById('2406.20094v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.20094v2-abstract-full" style="display: none;"> We propose a novel persona-driven data synthesis methodology that leverages various perspectives within a large language model (LLM) to create diverse synthetic data. To fully exploit this methodology at scale, we introduce Persona Hub -- a collection of 1 billion diverse personas automatically curated from web data. These 1 billion personas (~13% of the world's total population), acting as distributed carriers of world knowledge, can tap into almost every perspective encapsulated within the LLM, thereby facilitating the creation of diverse synthetic data at scale for various scenarios. By showcasing Persona Hub's use cases in synthesizing high-quality mathematical and logical reasoning problems, instructions (i.e., user prompts), knowledge-rich texts, game NPCs and tools (functions) at scale, we demonstrate persona-driven data synthesis is versatile, scalable, flexible, and easy to use, potentially driving a paradigm shift in synthetic data creation and applications in practice, which may have a profound impact on LLM research and development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.20094v2-abstract-full').style.display = 'none'; document.getElementById('2406.20094v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11698">arXiv:2406.11698</a> <span> [<a href="https://arxiv.org/pdf/2406.11698">pdf</a>, <a href="https://arxiv.org/format/2406.11698">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Meta Reasoning for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+P">Peizhong Gao</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+A">Ao Xie</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+S">Shaoguang Mao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W">Wenshan Wu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yan Xia</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haipeng Mi</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+F">Furu Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11698v1-abstract-short" style="display: inline;"> We introduce Meta-Reasoning Prompting (MRP), a novel and efficient system prompting method for large language models (LLMs) inspired by human meta-reasoning. Traditional in-context learning-based reasoning techniques, such as Tree-of-Thoughts, show promise but lack consistent state-of-the-art performance across diverse tasks due to their specialized nature. MRP addresses this limitation by guiding… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11698v1-abstract-full').style.display = 'inline'; document.getElementById('2406.11698v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11698v1-abstract-full" style="display: none;"> We introduce Meta-Reasoning Prompting (MRP), a novel and efficient system prompting method for large language models (LLMs) inspired by human meta-reasoning. Traditional in-context learning-based reasoning techniques, such as Tree-of-Thoughts, show promise but lack consistent state-of-the-art performance across diverse tasks due to their specialized nature. MRP addresses this limitation by guiding LLMs to dynamically select and apply different reasoning methods based on the specific requirements of each task, optimizing both performance and computational efficiency. With MRP, LLM reasoning operates in two phases. Initially, the LLM identifies the most appropriate reasoning method using task input cues and objective descriptions of available methods. Subsequently, it applies the chosen method to complete the task. This dynamic strategy mirrors human meta-reasoning, allowing the model to excel in a wide range of problem domains. We evaluate the effectiveness of MRP through comprehensive benchmarks. The results demonstrate that MRP achieves or approaches state-of-the-art performance across diverse tasks. MRP represents a significant advancement in enabling LLMs to identify cognitive challenges across problems and leverage benefits across different reasoning approaches, enhancing their ability to handle diverse and complex problem domains efficiently. Every LLM deserves a Meta-Reasoning Prompting to unlock its full potential and ensure adaptability in an ever-evolving landscape of challenges and applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11698v1-abstract-full').style.display = 'none'; document.getElementById('2406.11698v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.06326">arXiv:2406.06326</a> <span> [<a href="https://arxiv.org/pdf/2406.06326">pdf</a>, <a href="https://arxiv.org/format/2406.06326">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Self-Tuning: Instructing LLMs to Effectively Acquire New Knowledge through Self-Teaching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaoying Zhang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jingyan Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yipeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.06326v4-abstract-short" style="display: inline;"> Large language models (LLMs) often struggle to provide up-to-date information due to their one-time training and the constantly evolving nature of the world. To keep LLMs current, existing approaches typically involve continued pre-training on new documents. However, they frequently face difficulties in extracting stored knowledge. Motivated by the remarkable success of the Feynman Technique in ef… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06326v4-abstract-full').style.display = 'inline'; document.getElementById('2406.06326v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.06326v4-abstract-full" style="display: none;"> Large language models (LLMs) often struggle to provide up-to-date information due to their one-time training and the constantly evolving nature of the world. To keep LLMs current, existing approaches typically involve continued pre-training on new documents. However, they frequently face difficulties in extracting stored knowledge. Motivated by the remarkable success of the Feynman Technique in efficient human learning, we introduce Self-Tuning, a learning framework aimed at improving an LLM's ability to effectively acquire new knowledge from unseen raw documents through self-teaching. Specifically, we develop a Self-Teaching strategy that augments the documents with a set of knowledge-intensive tasks created in a self-supervised manner, focusing on three crucial aspects: memorization, comprehension, and self-reflection. Additionally, we introduce three Wiki-Newpages-2023-QA datasets to facilitate an in-depth analysis of an LLM's knowledge acquisition ability concerning memorization, extraction, and reasoning. Extensive experimental results on various models, e.g., Llama2-7B reveal that Self-Tuning consistently exhibits superior performance across all knowledge acquisition tasks and excels in preserving previous knowledge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06326v4-abstract-full').style.display = 'none'; document.getElementById('2406.06326v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">35 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02866">arXiv:2406.02866</a> <span> [<a href="https://arxiv.org/pdf/2406.02866">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> A Design Experience for Interactive Narrative Based on The User Behavior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yuan Yao</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haipeng Mi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02866v1-abstract-short" style="display: inline;"> Research on interactive narrative experiences in physical spaces is becoming more popular, growing into an established new media art format with the development of technology and evolution of audience aesthetics. However, the methods of designing interactive narratives are still similar to the basic video narratology of traditional designers, directors, and producers. This paper provides a design… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02866v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02866v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02866v1-abstract-full" style="display: none;"> Research on interactive narrative experiences in physical spaces is becoming more popular, growing into an established new media art format with the development of technology and evolution of audience aesthetics. However, the methods of designing interactive narratives are still similar to the basic video narratology of traditional designers, directors, and producers. This paper provides a design method based on the user's physical behavior and proposes an art installation by this method, where the aim of the installation is to transmit a more vivid story to users, presenting a new research inspiration of interactive narratology for designers and researchers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02866v1-abstract-full').style.display = 'none'; document.getElementById('2406.02866v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">to appear at Cumulus Conference Proceedings Roma 2021</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Cumulus Conference Proceedings Roma 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17837">arXiv:2405.17837</a> <span> [<a href="https://arxiv.org/pdf/2405.17837">pdf</a>, <a href="https://arxiv.org/format/2405.17837">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Enabling Generative Design Tools with LLM Agents for Mechanical Computation Devices: A Case Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+Q">Qiuyu Lu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+J">Jiawei Fang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Z">Zhihao Yao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yue Yang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+S">Shiqing Lyu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haipeng Mi</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+L">Lining Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17837v3-abstract-short" style="display: inline;"> In the field of Human-Computer Interaction (HCI), interactive devices with embedded mechanical computation are gaining attention. The rise of these cutting-edge devices has created a need for specialized design tools that democratize the prototyping process. While current tools streamline prototyping through parametric design and simulation, they often come with a steep learning curve and may not… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17837v3-abstract-full').style.display = 'inline'; document.getElementById('2405.17837v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17837v3-abstract-full" style="display: none;"> In the field of Human-Computer Interaction (HCI), interactive devices with embedded mechanical computation are gaining attention. The rise of these cutting-edge devices has created a need for specialized design tools that democratize the prototyping process. While current tools streamline prototyping through parametric design and simulation, they often come with a steep learning curve and may not fully support creative ideation. In this study, we use fluidic computation interfaces as a case study to explore how design tools for such devices can be augmented by Large Language Model agents (LLMs). Integrated with LLMs, the Generative Design Tool (GDT) better understands the capabilities and limitations of new technologies, proposes diverse and practical applications, and suggests designs that are technically and contextually appropriate. Additionally, it generates design parameters for visualizing results and producing fabrication-ready support files. This paper details the GDT's framework, implementation, and performance while addressing its potential and challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17837v3-abstract-full').style.display = 'none'; document.getElementById('2405.17837v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">38 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.12253">arXiv:2404.12253</a> <span> [<a href="https://arxiv.org/pdf/2404.12253">pdf</a>, <a href="https://arxiv.org/format/2404.12253">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Toward Self-Improvement of LLMs via Imagination, Searching, and Criticizing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.12253v2-abstract-short" style="display: inline;"> Despite the impressive capabilities of Large Language Models (LLMs) on various tasks, they still struggle with scenarios that involves complex reasoning and planning. Recent work proposed advanced prompting techniques and the necessity of fine-tuning with high-quality data to augment LLMs' reasoning abilities. However, these approaches are inherently constrained by data availability and quality. I… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.12253v2-abstract-full').style.display = 'inline'; document.getElementById('2404.12253v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.12253v2-abstract-full" style="display: none;"> Despite the impressive capabilities of Large Language Models (LLMs) on various tasks, they still struggle with scenarios that involves complex reasoning and planning. Recent work proposed advanced prompting techniques and the necessity of fine-tuning with high-quality data to augment LLMs' reasoning abilities. However, these approaches are inherently constrained by data availability and quality. In light of this, self-correction and self-learning emerge as viable solutions, employing strategies that allow LLMs to refine their outputs and learn from self-assessed rewards. Yet, the efficacy of LLMs in self-refining its response, particularly in complex reasoning and planning task, remains dubious. In this paper, we introduce AlphaLLM for the self-improvements of LLMs, which integrates Monte Carlo Tree Search (MCTS) with LLMs to establish a self-improving loop, thereby enhancing the capabilities of LLMs without additional annotations. Drawing inspiration from the success of AlphaGo, AlphaLLM addresses the unique challenges of combining MCTS with LLM for self-improvement, including data scarcity, the vastness search spaces of language tasks, and the subjective nature of feedback in language tasks. AlphaLLM is comprised of prompt synthesis component, an efficient MCTS approach tailored for language tasks, and a trio of critic models for precise feedback. Our experimental results in mathematical reasoning tasks demonstrate that AlphaLLM significantly enhances the performance of LLMs without additional annotations, showing the potential for self-improvement in LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.12253v2-abstract-full').style.display = 'none'; document.getElementById('2404.12253v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09338">arXiv:2404.09338</a> <span> [<a href="https://arxiv.org/pdf/2404.09338">pdf</a>, <a href="https://arxiv.org/format/2404.09338">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Entropy Guided Extrapolative Decoding to Improve Factuality in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Das%2C+S">Souvik Das</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09338v1-abstract-short" style="display: inline;"> Large language models (LLMs) exhibit impressive natural language capabilities but suffer from hallucination -- generating content ungrounded in the realities of training data. Recent work has focused on decoding techniques to improve factuality during inference by leveraging LLMs' hierarchical representation of factual knowledge, manipulating the predicted distributions at inference time. Current… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09338v1-abstract-full').style.display = 'inline'; document.getElementById('2404.09338v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09338v1-abstract-full" style="display: none;"> Large language models (LLMs) exhibit impressive natural language capabilities but suffer from hallucination -- generating content ungrounded in the realities of training data. Recent work has focused on decoding techniques to improve factuality during inference by leveraging LLMs' hierarchical representation of factual knowledge, manipulating the predicted distributions at inference time. Current state-of-the-art approaches refine decoding by contrasting early-exit distributions from a lower layer with the final layer to exploit information related to factuality within the model forward procedure. However, such methods often assume the final layer is the most reliable and the lower layer selection process depends on it. In this work, we first propose extrapolation of critical token probabilities beyond the last layer for more accurate contrasting. We additionally employ layer-wise entropy-guided lower layer selection, decoupling the selection process from the final layer. Experiments demonstrate strong performance - surpassing state-of-the-art on multiple different datasets by large margins. Analyses show different kinds of prompts respond to different selection strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09338v1-abstract-full').style.display = 'none'; document.getElementById('2404.09338v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in Progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.09849">arXiv:2403.09849</a> <span> [<a href="https://arxiv.org/pdf/2403.09849">pdf</a>, <a href="https://arxiv.org/format/2403.09849">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Self-Consistency Boosts Calibration for Math Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+A">Ante Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Jinsong Su</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.09849v1-abstract-short" style="display: inline;"> Calibration, which establishes the correlation between accuracy and model confidence, is important for LLM development. We design three off-the-shelf calibration methods based on self-consistency (Wang et al., 2022) for math reasoning tasks. Evaluation on two popular benchmarks (GSM8K and MathQA) using strong open-source LLMs (Mistral and LLaMA2), our methods better bridge model confidence and acc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09849v1-abstract-full').style.display = 'inline'; document.getElementById('2403.09849v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.09849v1-abstract-full" style="display: none;"> Calibration, which establishes the correlation between accuracy and model confidence, is important for LLM development. We design three off-the-shelf calibration methods based on self-consistency (Wang et al., 2022) for math reasoning tasks. Evaluation on two popular benchmarks (GSM8K and MathQA) using strong open-source LLMs (Mistral and LLaMA2), our methods better bridge model confidence and accuracy than existing methods based on p(True) (Kadavath et al., 2022) or logit (Kadavath et al., 2022). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09849v1-abstract-full').style.display = 'none'; document.getElementById('2403.09849v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03496">arXiv:2403.03496</a> <span> [<a href="https://arxiv.org/pdf/2403.03496">pdf</a>, <a href="https://arxiv.org/ps/2403.03496">ps</a>, <a href="https://arxiv.org/format/2403.03496">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Knowledge Plug-and-Play Test Bed for Open-domain Dialogue Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiangci Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+J">Jessica Ouyang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03496v1-abstract-short" style="display: inline;"> Knowledge-based, open-domain dialogue generation aims to build chit-chat systems that talk to humans using mined support knowledge. Many types and sources of knowledge have previously been shown to be useful as support knowledge. Even in the era of large language models, response generation grounded in knowledge retrieved from additional up-to-date sources remains a practically important approach.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03496v1-abstract-full').style.display = 'inline'; document.getElementById('2403.03496v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03496v1-abstract-full" style="display: none;"> Knowledge-based, open-domain dialogue generation aims to build chit-chat systems that talk to humans using mined support knowledge. Many types and sources of knowledge have previously been shown to be useful as support knowledge. Even in the era of large language models, response generation grounded in knowledge retrieved from additional up-to-date sources remains a practically important approach. While prior work using single-source knowledge has shown a clear positive correlation between the performances of knowledge selection and response generation, there are no existing multi-source datasets for evaluating support knowledge retrieval. Further, prior work has assumed that the knowledge sources available at test time are the same as during training. This unrealistic assumption unnecessarily handicaps models, as new knowledge sources can become available after a model is trained. In this paper, we present a high-quality benchmark named multi-source Wizard of Wikipedia (Ms.WoW) for evaluating multi-source dialogue knowledge selection and response generation. Unlike existing datasets, it contains clean support knowledge, grounded at the utterance level and partitioned into multiple knowledge sources. We further propose a new challenge, dialogue knowledge plug-and-play, which aims to test an already trained dialogue model on using new support knowledge from previously unseen sources in a zero-shot fashion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03496v1-abstract-full').style.display = 'none'; document.getElementById('2403.03496v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by LREC-COLING 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.17982">arXiv:2402.17982</a> <span> [<a href="https://arxiv.org/pdf/2402.17982">pdf</a>, <a href="https://arxiv.org/format/2402.17982">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Collaborative decoding of critical tokens for boosting factuality of large language models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.17982v1-abstract-short" style="display: inline;"> The most common training pipeline for large language models includes pretraining, finetuning and aligning phases, with their respective resulting models, such as the pretrained model and the finetuned model. Finetuned and aligned models show improved abilities of instruction following and safe generation, however their abilities to stay factual about the world are impacted by the finetuning proces… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17982v1-abstract-full').style.display = 'inline'; document.getElementById('2402.17982v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.17982v1-abstract-full" style="display: none;"> The most common training pipeline for large language models includes pretraining, finetuning and aligning phases, with their respective resulting models, such as the pretrained model and the finetuned model. Finetuned and aligned models show improved abilities of instruction following and safe generation, however their abilities to stay factual about the world are impacted by the finetuning process. Furthermore, the common practice of using sampling during generation also increases chances of hallucination. In this work, we introduce a collaborative decoding framework to harness the high factuality within pretrained models through the concept of critical tokens. We first design a critical token classifier to decide which model to use for the next token, and subsequently generates the next token using different decoding strategies. Experiments with different models and datasets show that our decoding framework is able to reduce model hallucination significantly, showcasing the importance of the collaborative decoding framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17982v1-abstract-full').style.display = 'none'; document.getElementById('2402.17982v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15631">arXiv:2402.15631</a> <span> [<a href="https://arxiv.org/pdf/2402.15631">pdf</a>, <a href="https://arxiv.org/format/2402.15631">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Fine-Grained Self-Endorsement Improves Factuality and Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+A">Ante Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Jinsong Su</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15631v1-abstract-short" style="display: inline;"> This work studies improving large language model (LLM) generations at inference time by mitigating fact-conflicting hallucinations. Particularly, we propose a self-endorsement framework that leverages the fine-grained fact-level comparisons across multiple sampled responses. Compared with prior ensemble methods (Wang et al., 2022;Chen et al., 2023)) that perform response-level selection, our appro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15631v1-abstract-full').style.display = 'inline'; document.getElementById('2402.15631v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15631v1-abstract-full" style="display: none;"> This work studies improving large language model (LLM) generations at inference time by mitigating fact-conflicting hallucinations. Particularly, we propose a self-endorsement framework that leverages the fine-grained fact-level comparisons across multiple sampled responses. Compared with prior ensemble methods (Wang et al., 2022;Chen et al., 2023)) that perform response-level selection, our approach can better alleviate hallucinations, especially for longform generation tasks. Our approach can broadly benefit smaller and open-source LLMs as it mainly conducts simple content-based comparisons. Experiments on Biographies show that our method can effectively improve the factuality of generations with simple and intuitive prompts across different scales of LLMs. Besides, comprehensive analyses on TriviaQA and GSM8K demonstrate the potential of self-endorsement for broader application. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15631v1-abstract-full').style.display = 'none'; document.getElementById('2402.15631v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09267">arXiv:2402.09267</a> <span> [<a href="https://arxiv.org/pdf/2402.09267">pdf</a>, <a href="https://arxiv.org/format/2402.09267">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Self-Alignment for Factuality: Mitigating Hallucinations in LLMs via Self-Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaoying Zhang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jingyan Zhou</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09267v2-abstract-short" style="display: inline;"> Despite showing increasingly human-like abilities, large language models (LLMs) often struggle with factual inaccuracies, i.e. "hallucinations", even when they hold relevant knowledge. To address these hallucinations, current approaches typically necessitate high-quality human factuality annotations. In this work, we explore Self-Alignment for Factuality, where we leverage the self-evaluation capa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09267v2-abstract-full').style.display = 'inline'; document.getElementById('2402.09267v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09267v2-abstract-full" style="display: none;"> Despite showing increasingly human-like abilities, large language models (LLMs) often struggle with factual inaccuracies, i.e. "hallucinations", even when they hold relevant knowledge. To address these hallucinations, current approaches typically necessitate high-quality human factuality annotations. In this work, we explore Self-Alignment for Factuality, where we leverage the self-evaluation capability of an LLM to provide training signals that steer the model towards factuality. Specifically, we incorporate Self-Eval, a self-evaluation component, to prompt an LLM to validate the factuality of its own generated responses solely based on its internal knowledge. Additionally, we design Self-Knowledge Tuning (SK-Tuning) to augment the LLM's self-evaluation ability by improving the model's confidence estimation and calibration. We then utilize these self-annotated responses to fine-tune the model via Direct Preference Optimization algorithm. We show that the proposed self-alignment approach substantially enhances factual accuracy over Llama family models across three key knowledge-intensive tasks on TruthfulQA and BioGEN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09267v2-abstract-full').style.display = 'none'; document.getElementById('2402.09267v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ACL2024 Main </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09199">arXiv:2402.09199</a> <span> [<a href="https://arxiv.org/pdf/2402.09199">pdf</a>, <a href="https://arxiv.org/format/2402.09199">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.24963/ijcai.2024/55">10.24963/ijcai.2024/55 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Ten Words Only Still Help: Improving Black-Box AI-Generated Text Detection via Proxy-Guided Efficient Re-Sampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yuhui Shi</a>, <a href="/search/cs?searchtype=author&query=Sheng%2C+Q">Qiang Sheng</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Juan Cao</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Hao Mi</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+B">Beizhe Hu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Danding Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09199v1-abstract-short" style="display: inline;"> With the rapidly increasing application of large language models (LLMs), their abuse has caused many undesirable societal problems such as fake news, academic dishonesty, and information pollution. This makes AI-generated text (AIGT) detection of great importance. Among existing methods, white-box methods are generally superior to black-box methods in terms of performance and generalizability, but… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09199v1-abstract-full').style.display = 'inline'; document.getElementById('2402.09199v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09199v1-abstract-full" style="display: none;"> With the rapidly increasing application of large language models (LLMs), their abuse has caused many undesirable societal problems such as fake news, academic dishonesty, and information pollution. This makes AI-generated text (AIGT) detection of great importance. Among existing methods, white-box methods are generally superior to black-box methods in terms of performance and generalizability, but they require access to LLMs' internal states and are not applicable to black-box settings. In this paper, we propose to estimate word generation probabilities as pseudo white-box features via multiple re-sampling to help improve AIGT detection under the black-box setting. Specifically, we design POGER, a proxy-guided efficient re-sampling method, which selects a small subset of representative words (e.g., 10 words) for performing multiple re-sampling in black-box AIGT detection. Experiments on datasets containing texts from humans and seven LLMs show that POGER outperforms all baselines in macro F1 under black-box, partial white-box, and out-of-distribution settings and maintains lower re-sampling costs than its existing counterparts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09199v1-abstract-full').style.display = 'none'; document.getElementById('2402.09199v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 6 figures, 7 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IJCAI 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.10353">arXiv:2401.10353</a> <span> [<a href="https://arxiv.org/pdf/2401.10353">pdf</a>, <a href="https://arxiv.org/format/2401.10353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Inconsistent dialogue responses and how to recover from them </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mian Zhang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.10353v1-abstract-short" style="display: inline;"> One critical issue for chat systems is to stay consistent about preferences, opinions, beliefs and facts of itself, which has been shown a difficult problem. In this work, we study methods to assess and bolster utterance consistency of chat systems. A dataset is first developed for studying the inconsistencies, where inconsistent dialogue responses, explanations of the inconsistencies, and recover… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.10353v1-abstract-full').style.display = 'inline'; document.getElementById('2401.10353v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.10353v1-abstract-full" style="display: none;"> One critical issue for chat systems is to stay consistent about preferences, opinions, beliefs and facts of itself, which has been shown a difficult problem. In this work, we study methods to assess and bolster utterance consistency of chat systems. A dataset is first developed for studying the inconsistencies, where inconsistent dialogue responses, explanations of the inconsistencies, and recovery utterances are authored by annotators. This covers the life span of inconsistencies, namely introduction, understanding, and resolution. Building on this, we introduce a set of tasks centered on dialogue consistency, specifically focused on its detection and resolution. Our experimental findings indicate that our dataset significantly helps the progress in identifying and resolving conversational inconsistencies, and current popular large language models like ChatGPT which are good at resolving inconsistencies however still struggle with detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.10353v1-abstract-full').style.display = 'none'; document.getElementById('2401.10353v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in EACL 2024. Code and dataset available at https://github.com/mianzhang/CIDER</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.16155">arXiv:2309.16155</a> <span> [<a href="https://arxiv.org/pdf/2309.16155">pdf</a>, <a href="https://arxiv.org/format/2309.16155">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Trickle-down Impact of Reward (In-)consistency on RLHF </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+L">Lingfeng Shen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Sihao Chen</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Khashabi%2C+D">Daniel Khashabi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.16155v1-abstract-short" style="display: inline;"> Standard practice within Reinforcement Learning from Human Feedback (RLHF) involves optimizing against a Reward Model (RM), which itself is trained to reflect human preferences for desirable generations. A notable subject that is understudied is the (in-)consistency of RMs -- whether they can recognize the semantic changes to different prompts and appropriately adapt their reward assignments -- an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.16155v1-abstract-full').style.display = 'inline'; document.getElementById('2309.16155v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.16155v1-abstract-full" style="display: none;"> Standard practice within Reinforcement Learning from Human Feedback (RLHF) involves optimizing against a Reward Model (RM), which itself is trained to reflect human preferences for desirable generations. A notable subject that is understudied is the (in-)consistency of RMs -- whether they can recognize the semantic changes to different prompts and appropriately adapt their reward assignments -- and their impact on the downstream RLHF model. In this paper, we visit a series of research questions relevant to RM inconsistency: (1) How can we measure the consistency of reward models? (2) How consistent are the existing RMs and how can we improve them? (3) In what ways does reward inconsistency influence the chatbots resulting from the RLHF model training? We propose Contrast Instructions -- a benchmarking strategy for the consistency of RM. Each example in Contrast Instructions features a pair of lexically similar instructions with different ground truth responses. A consistent RM is expected to rank the corresponding instruction and response higher than other combinations. We observe that current RMs trained with the standard ranking objective fail miserably on Contrast Instructions compared to average humans. To show that RM consistency can be improved efficiently without using extra training budget, we propose two techniques ConvexDA and RewardFusion, which enhance reward consistency through extrapolation during the RM training and inference stage, respectively. We show that RLHF models trained with a more consistent RM yield more useful responses, suggesting that reward inconsistency exhibits a trickle-down effect on the downstream RLHF process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.16155v1-abstract-full').style.display = 'none'; document.getElementById('2309.16155v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.10202">arXiv:2309.10202</a> <span> [<a href="https://arxiv.org/pdf/2309.10202">pdf</a>, <a href="https://arxiv.org/format/2309.10202">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Stabilizing RLHF through Advantage Model and Selective Rehearsal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Peng%2C+B">Baolin Peng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.10202v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have revolutionized natural language processing, yet aligning these models with human values and preferences using RLHF remains a significant challenge. This challenge is characterized by various instabilities, such as reward hacking and catastrophic forgetting. In this technical report, we propose two innovations to stabilize RLHF training: 1) Advantage Model, which d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.10202v1-abstract-full').style.display = 'inline'; document.getElementById('2309.10202v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.10202v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have revolutionized natural language processing, yet aligning these models with human values and preferences using RLHF remains a significant challenge. This challenge is characterized by various instabilities, such as reward hacking and catastrophic forgetting. In this technical report, we propose two innovations to stabilize RLHF training: 1) Advantage Model, which directly models advantage score i.e., extra reward compared to the expected rewards and regulates score distributions across tasks to prevent reward hacking. 2) Selective Rehearsal, which mitigates catastrophic forgetting by strategically selecting data for PPO training and knowledge rehearsing. Our experimental analysis on public and proprietary datasets reveals that the proposed methods not only increase stability in RLHF training but also achieve higher reward scores and win rates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.10202v1-abstract-full').style.display = 'none'; document.getElementById('2309.10202v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, working in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.00865">arXiv:2303.00865</a> <span> [<a href="https://arxiv.org/pdf/2303.00865">pdf</a>, <a href="https://arxiv.org/format/2303.00865">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AMIGO: Sparse Multi-Modal Graph Transformer with Shared-Context Processing for Representation Learning of Giga-pixel Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nakhli%2C+R">Ramin Nakhli</a>, <a href="/search/cs?searchtype=author&query=Moghadam%2C+P+A">Puria Azadi Moghadam</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haoyang Mi</a>, <a href="/search/cs?searchtype=author&query=Farahani%2C+H">Hossein Farahani</a>, <a href="/search/cs?searchtype=author&query=Baras%2C+A">Alexander Baras</a>, <a href="/search/cs?searchtype=author&query=Gilks%2C+B">Blake Gilks</a>, <a href="/search/cs?searchtype=author&query=Bashashati%2C+A">Ali Bashashati</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.00865v2-abstract-short" style="display: inline;"> Processing giga-pixel whole slide histopathology images (WSI) is a computationally expensive task. Multiple instance learning (MIL) has become the conventional approach to process WSIs, in which these images are split into smaller patches for further processing. However, MIL-based techniques ignore explicit information about the individual cells within a patch. In this paper, by defining the novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.00865v2-abstract-full').style.display = 'inline'; document.getElementById('2303.00865v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.00865v2-abstract-full" style="display: none;"> Processing giga-pixel whole slide histopathology images (WSI) is a computationally expensive task. Multiple instance learning (MIL) has become the conventional approach to process WSIs, in which these images are split into smaller patches for further processing. However, MIL-based techniques ignore explicit information about the individual cells within a patch. In this paper, by defining the novel concept of shared-context processing, we designed a multi-modal Graph Transformer (AMIGO) that uses the celluar graph within the tissue to provide a single representation for a patient while taking advantage of the hierarchical structure of the tissue, enabling a dynamic focus between cell-level and tissue-level information. We benchmarked the performance of our model against multiple state-of-the-art methods in survival prediction and showed that ours can significantly outperform all of them including hierarchical Vision Transformer (ViT). More importantly, we show that our model is strongly robust to missing information to an extent that it can achieve the same performance with as low as 20% of the data. Finally, in two different cancer datasets, we demonstrated that our model was able to stratify the patients into low-risk and high-risk groups while other state-of-the-art methods failed to achieve this goal. We also publish a large dataset of immunohistochemistry images (InUIT) containing 1,600 tissue microarray (TMA) cores from 188 patients along with their survival information, making it one of the largest publicly available datasets in this context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.00865v2-abstract-full').style.display = 'none'; document.getElementById('2303.00865v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at CVPR 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.09300">arXiv:2302.09300</a> <span> [<a href="https://arxiv.org/pdf/2302.09300">pdf</a>, <a href="https://arxiv.org/format/2302.09300">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.artint.2023.103874">10.1016/j.artint.2023.103874 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Search-Engine-augmented Dialogue Response Generation with Cheaply Supervised Query Production </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+A">Ante Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qi Liu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Longyue Wang</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+Z">Zhaopeng Tu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Jinsong Su</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.09300v1-abstract-short" style="display: inline;"> Knowledge-aided dialogue response generation aims at augmenting chatbots with relevant external knowledge in the hope of generating more informative responses. The majority of previous work assumes that the relevant knowledge is given as input or retrieved from a static pool of knowledge. However, this assumption violates the real-world situation, where knowledge is continually updated and a chatb… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09300v1-abstract-full').style.display = 'inline'; document.getElementById('2302.09300v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.09300v1-abstract-full" style="display: none;"> Knowledge-aided dialogue response generation aims at augmenting chatbots with relevant external knowledge in the hope of generating more informative responses. The majority of previous work assumes that the relevant knowledge is given as input or retrieved from a static pool of knowledge. However, this assumption violates the real-world situation, where knowledge is continually updated and a chatbot has to dynamically retrieve useful knowledge. We propose a dialogue model that can access the vast and dynamic information from any search engine for response generation. As the core module, a query producer is used to generate queries from a dialogue context to interact with a search engine. We design a training algorithm using cheap noisy supervision for the query producer, where the signals are obtained by comparing retrieved articles with the next dialogue response. As the result, the query producer is adjusted without any human annotation of gold queries, making it easily transferable to other domains and search engines. Experiments show that our query producer can achieve R@1 and R@5 rates of 62.4% and 74.8% for retrieving gold knowledge, and the overall model generates better responses over strong knowledge-aided baselines using BART and other typical systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09300v1-abstract-full').style.display = 'none'; document.getElementById('2302.09300v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Artificial Intelligence 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.13683">arXiv:2301.13683</a> <span> [<a href="https://arxiv.org/pdf/2301.13683">pdf</a>, <a href="https://arxiv.org/format/2301.13683">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Friend-training: Learning from Models of Different but Related Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mian Zhang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiabing Zhou</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.13683v1-abstract-short" style="display: inline;"> Current self-training methods such as standard self-training, co-training, tri-training, and others often focus on improving model performance on a single task, utilizing differences in input features, model architectures, and training processes. However, many tasks in natural language processing are about different but related aspects of language, and models trained for one task can be great teac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13683v1-abstract-full').style.display = 'inline'; document.getElementById('2301.13683v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.13683v1-abstract-full" style="display: none;"> Current self-training methods such as standard self-training, co-training, tri-training, and others often focus on improving model performance on a single task, utilizing differences in input features, model architectures, and training processes. However, many tasks in natural language processing are about different but related aspects of language, and models trained for one task can be great teachers for other related tasks. In this work, we propose friend-training, a cross-task self-training framework, where models trained to do different tasks are used in an iterative training, pseudo-labeling, and retraining process to help each other for better selection of pseudo-labels. With two dialogue understanding tasks, conversational semantic role labeling and dialogue rewriting, chosen for a case study, we show that the models trained with the friend-training framework achieve the best performance compared to strong baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13683v1-abstract-full').style.display = 'none'; document.getElementById('2301.13683v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EACL2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.04476">arXiv:2211.04476</a> <span> [<a href="https://arxiv.org/pdf/2211.04476">pdf</a>, <a href="https://arxiv.org/format/2211.04476">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Discover, Explanation, Improvement: An Automatic Slice Detection Framework for Natural Language Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+W">Wenyue Hua</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.04476v2-abstract-short" style="display: inline;"> Pretrained natural language processing (NLP) models have achieved high overall performance, but they still make systematic errors. Instead of manual error analysis, research on slice detection models (SDM), which automatically identify underperforming groups of datapoints, has caught escalated attention in Computer Vision for both understanding model behaviors and providing insights for future mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04476v2-abstract-full').style.display = 'inline'; document.getElementById('2211.04476v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.04476v2-abstract-full" style="display: none;"> Pretrained natural language processing (NLP) models have achieved high overall performance, but they still make systematic errors. Instead of manual error analysis, research on slice detection models (SDM), which automatically identify underperforming groups of datapoints, has caught escalated attention in Computer Vision for both understanding model behaviors and providing insights for future model training and designing. However, little research on SDM and quantitative evaluation of their effectiveness have been conducted on NLP tasks. Our paper fills the gap by proposing a benchmark named "Discover, Explain, Improve (DEIM)" for classification NLP tasks along with a new SDM Edisa. Edisa discovers coherent and underperforming groups of datapoints; DEIM then unites them under human-understandable concepts and provides comprehensive evaluation tasks and corresponding quantitative metrics. The evaluation in DEIM shows that Edisa can accurately select error-prone datapoints with informative semantic features that summarize error patterns. Detecting difficult datapoints directly boosts model performance without tuning any original model parameters, showing that discovered slices are actionable for users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04476v2-abstract-full').style.display = 'none'; document.getElementById('2211.04476v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 5 figures, accepted by Transactions of the Association for Computational Linguistics</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.12309">arXiv:2210.12309</a> <span> [<a href="https://arxiv.org/pdf/2210.12309">pdf</a>, <a href="https://arxiv.org/format/2210.12309">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Learning a Grammar Inducer from Massive Uncurated Instructional Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Songyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lifeng Jin</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kun Xu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jiebo Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.12309v1-abstract-short" style="display: inline;"> Video-aided grammar induction aims to leverage video information for finding more accurate syntactic grammars for accompanying text. While previous work focuses on building systems for inducing grammars on text that are well-aligned with video content, we investigate the scenario, in which text and video are only in loose correspondence. Such data can be found in abundance online, and the weak cor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12309v1-abstract-full').style.display = 'inline'; document.getElementById('2210.12309v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.12309v1-abstract-full" style="display: none;"> Video-aided grammar induction aims to leverage video information for finding more accurate syntactic grammars for accompanying text. While previous work focuses on building systems for inducing grammars on text that are well-aligned with video content, we investigate the scenario, in which text and video are only in loose correspondence. Such data can be found in abundance online, and the weak correspondence is similar to the indeterminacy problem studied in language acquisition. Furthermore, we build a new model that can better learn video-span correlation without manually designed features adopted by previous work. Experiments show that our model trained only on large-scale YouTube data with no text-video alignment reports strong and robust performances across three unseen datasets, despite domain shift and noisy label issues. Furthermore our model yields higher F1 scores than the previous state-of-the-art systems trained on in-domain data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12309v1-abstract-full').style.display = 'none'; document.getElementById('2210.12309v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EMNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.04045">arXiv:2203.04045</a> <span> [<a href="https://arxiv.org/pdf/2203.04045">pdf</a>, <a href="https://arxiv.org/format/2203.04045">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Towards Generalized Models for Task-oriented Dialogue Modeling on Spoken Conversations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+R">Ruijie Yan</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+S">Shuang Peng</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Liang Jiang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shihui Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuchi Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiajun Li</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+L">Liangrui Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yongliang Wang</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Z">Zujie Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.04045v1-abstract-short" style="display: inline;"> Building robust and general dialogue models for spoken conversations is challenging due to the gap in distributions of spoken and written data. This paper presents our approach to build generalized models for the Knowledge-grounded Task-oriented Dialogue Modeling on Spoken Conversations Challenge of DSTC-10. In order to mitigate the discrepancies between spoken and written text, we mainly employ e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.04045v1-abstract-full').style.display = 'inline'; document.getElementById('2203.04045v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.04045v1-abstract-full" style="display: none;"> Building robust and general dialogue models for spoken conversations is challenging due to the gap in distributions of spoken and written data. This paper presents our approach to build generalized models for the Knowledge-grounded Task-oriented Dialogue Modeling on Spoken Conversations Challenge of DSTC-10. In order to mitigate the discrepancies between spoken and written text, we mainly employ extensive data augmentation strategies on written data, including artificial error injection and round-trip text-speech transformation. To train robust models for spoken conversations, we improve pre-trained language models, and apply ensemble algorithms for each sub-task. Typically, for the detection task, we fine-tune \roberta and ELECTRA, and run an error-fixing ensemble algorithm. For the selection task, we adopt a two-stage framework that consists of entity tracking and knowledge ranking, and propose a multi-task learning method to learn multi-level semantic information by domain classification and entity selection. For the generation task, we adopt a cross-validation data process to improve pre-trained generative language models, followed by a consensus decoding algorithm, which can add arbitrary features like relative \rouge metric, and tune associated feature weights toward \bleu directly. Our approach ranks third on the objective evaluation and second on the final official human evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.04045v1-abstract-full').style.display = 'none'; document.getElementById('2203.04045v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.00281">arXiv:2203.00281</a> <span> [<a href="https://arxiv.org/pdf/2203.00281">pdf</a>, <a href="https://arxiv.org/format/2203.00281">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Fast-R2D2: A Pretrained Recursive Neural Network based on Pruned CKY for Grammar Induction and Text Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiang Hu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Liang Li</a>, <a href="/search/cs?searchtype=author&query=de+Melo%2C+G">Gerard de Melo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.00281v3-abstract-short" style="display: inline;"> Recently CKY-based models show great potential in unsupervised grammar induction thanks to their human-like encoding paradigm, which runs recursively and hierarchically, but requires $O(n^3)$ time-complexity. Recursive Transformer based on Differentiable Trees (R2D2) makes it possible to scale to large language model pre-training even with complex tree encoder by introducing a heuristic pruning me… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.00281v3-abstract-full').style.display = 'inline'; document.getElementById('2203.00281v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.00281v3-abstract-full" style="display: none;"> Recently CKY-based models show great potential in unsupervised grammar induction thanks to their human-like encoding paradigm, which runs recursively and hierarchically, but requires $O(n^3)$ time-complexity. Recursive Transformer based on Differentiable Trees (R2D2) makes it possible to scale to large language model pre-training even with complex tree encoder by introducing a heuristic pruning method. However, the rule-based pruning approach suffers from local optimum and slow inference issues. In this paper, we fix those issues in a unified method. We propose to use a top-down parser as a model-based pruning method, which also enables parallel encoding during inference. Typically, our parser casts parsing as a split point scoring task, which first scores all split points for a given sentence, and then recursively splits a span into two by picking a split point with the highest score in the current span. The reverse order of the splits is considered as the order of pruning in R2D2 encoder. Beside the bi-directional language model loss, we also optimize the parser by minimizing the KL distance between tree probabilities from parser and R2D2. Our experiments show that our Fast-R2D2 improves performance significantly in grammar induction and achieves competitive results in downstream classification tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.00281v3-abstract-full').style.display = 'none'; document.getElementById('2203.00281v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.14430">arXiv:2112.14430</a> <span> [<a href="https://arxiv.org/pdf/2112.14430">pdf</a>, <a href="https://arxiv.org/format/2112.14430">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> DP-FP: Differentially Private Forward Propagation for Large Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+J">Jian Du</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.14430v1-abstract-short" style="display: inline;"> When applied to large-scale learning problems, the conventional wisdom on privacy-preserving deep learning, known as Differential Private Stochastic Gradient Descent (DP-SGD), has met with limited success due to significant performance degradation and high memory overhead when compared to the non-privacy counterpart. We show how to mitigate the performance drop by replacing the DP-SGD with a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.14430v1-abstract-full').style.display = 'inline'; document.getElementById('2112.14430v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.14430v1-abstract-full" style="display: none;"> When applied to large-scale learning problems, the conventional wisdom on privacy-preserving deep learning, known as Differential Private Stochastic Gradient Descent (DP-SGD), has met with limited success due to significant performance degradation and high memory overhead when compared to the non-privacy counterpart. We show how to mitigate the performance drop by replacing the DP-SGD with a novel DP Forward-Propagation (DP-FP) followed by an off-the-shelf non-DP optimizer. Our DP-FP employs novel (1) representation clipping followed by noise addition in the forward propagation stage, as well as (2) micro-batch construction via subsampling to achieve DP amplification and reduce noise power to $1/M$, where $M$ is the number of micro-batch in a step. When training a classification model, our DP-FP with all of the privacy-preserving operations on the representation is innately free of gradient bias, total noise proportionally to model size, and memory issues in DP-SGD. As a result, our DP-FP outperforms cutting-edge DP-SGD while retaining the same level of privacy, and it approaches non-private baselines and significantly outperforms state-of-the-art DP-SGD variants. When applied to RoBERTa-large on four downstream tasks, for example, DP-FP achieves an average accuracy of 91.34\% with privacy budgets less than 3, representing a 3.81\% performance improvement over the state-of-the-art DP-SGD and only a 0.9\% loss compared to the non-private baseline but with a significantly lower privacy leakage risk. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.14430v1-abstract-full').style.display = 'none'; document.getElementById('2112.14430v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.05866">arXiv:2107.05866</a> <span> [<a href="https://arxiv.org/pdf/2107.05866">pdf</a>, <a href="https://arxiv.org/format/2107.05866">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> A Dialogue-based Information Extraction System for Medical Insurance Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Peng%2C+S">Shuang Peng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Mengdi Zhou</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Minghui Yang</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+S">Shaosheng Cao</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Z">Zujie Wen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+T">Teng Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hongbin Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.05866v1-abstract-short" style="display: inline;"> In the Chinese medical insurance industry, the assessor's role is essential and requires significant efforts to converse with the claimant. This is a highly professional job that involves many parts, such as identifying personal information, collecting related evidence, and making a final insurance report. Due to the coronavirus (COVID-19) pandemic, the previous offline insurance assessment has to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.05866v1-abstract-full').style.display = 'inline'; document.getElementById('2107.05866v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.05866v1-abstract-full" style="display: none;"> In the Chinese medical insurance industry, the assessor's role is essential and requires significant efforts to converse with the claimant. This is a highly professional job that involves many parts, such as identifying personal information, collecting related evidence, and making a final insurance report. Due to the coronavirus (COVID-19) pandemic, the previous offline insurance assessment has to be conducted online. However, for the junior assessor often lacking practical experience, it is not easy to quickly handle such a complex online procedure, yet this is important as the insurance company needs to decide how much compensation the claimant should receive based on the assessor's feedback. In order to promote assessors' work efficiency and speed up the overall procedure, in this paper, we propose a dialogue-based information extraction system that integrates advanced NLP technologies for medical insurance assessment. With the assistance of our system, the average time cost of the procedure is reduced from 55 minutes to 35 minutes, and the total human resources cost is saved 30% compared with the previous offline procedure. Until now, the system has already served thousands of online claim cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.05866v1-abstract-full').style.display = 'none'; document.getElementById('2107.05866v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be published in the Findings of ACL 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.00967">arXiv:2107.00967</a> <span> [<a href="https://arxiv.org/pdf/2107.00967">pdf</a>, <a href="https://arxiv.org/format/2107.00967">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2021.acl-long.379">10.18653/v1/2021.acl-long.379 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> R2D2: Recursive Transformer based on Differentiable Tree for Interpretable Hierarchical Language Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiang Hu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Z">Zujie Wen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yafang Wang</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yi Su</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jing Zheng</a>, <a href="/search/cs?searchtype=author&query=de+Melo%2C+G">Gerard de Melo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.00967v2-abstract-short" style="display: inline;"> Human language understanding operates at multiple levels of granularity (e.g., words, phrases, and sentences) with increasing levels of abstraction that can be hierarchically combined. However, existing deep models with stacked layers do not explicitly model any sort of hierarchical process. This paper proposes a recursive Transformer model based on differentiable CKY style binary trees to emulate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.00967v2-abstract-full').style.display = 'inline'; document.getElementById('2107.00967v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.00967v2-abstract-full" style="display: none;"> Human language understanding operates at multiple levels of granularity (e.g., words, phrases, and sentences) with increasing levels of abstraction that can be hierarchically combined. However, existing deep models with stacked layers do not explicitly model any sort of hierarchical process. This paper proposes a recursive Transformer model based on differentiable CKY style binary trees to emulate the composition process. We extend the bidirectional language model pre-training objective to this architecture, attempting to predict each word given its left and right abstraction nodes. To scale up our approach, we also introduce an efficient pruned tree induction algorithm to enable encoding in just a linear number of composition steps. Experimental results on language modeling and unsupervised parsing show the effectiveness of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.00967v2-abstract-full').style.display = 'none'; document.getElementById('2107.00967v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL-IJCNLP 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.01511">arXiv:2105.01511</a> <span> [<a href="https://arxiv.org/pdf/2105.01511">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Radio Communication Scenarios in 5G-Railways </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+R">Ruisi He</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+B">Bo Ai</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+Z">Zhangdui Zhong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Mi Yang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chen Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Ruifeng Chen</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+J">Jianwen Ding</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Hang Mi</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Z">Zhangfeng Ma</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Guiqi Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Changzhu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.01511v1-abstract-short" style="display: inline;"> With the rapid development of railways, especially high-speed railways, there is an increasingly urgent demand for new wireless communication system for railways. Taking the mature 5G technology as an opportunity, 5G-railways (5G-R) have been widely regarded as a solution to meet the diversified demands of railway wireless communications. For the design, deployment and improvement of 5G-R networks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.01511v1-abstract-full').style.display = 'inline'; document.getElementById('2105.01511v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.01511v1-abstract-full" style="display: none;"> With the rapid development of railways, especially high-speed railways, there is an increasingly urgent demand for new wireless communication system for railways. Taking the mature 5G technology as an opportunity, 5G-railways (5G-R) have been widely regarded as a solution to meet the diversified demands of railway wireless communications. For the design, deployment and improvement of 5G-R networks, radio communication scenario classification plays an important role, affecting channel modeling and system performance evaluation. In this paper, a standardized radio communication scenario classification, including 18 scenarios, is proposed for 5G-R. This paper analyzes the differences of 5G-R scenarios compared with the traditional cellular networks and GSM-railways, according to 5G-R requirements and the unique physical environment and propagation characteristics. The proposed standardized scenario classification helps deepen the research of 5G-R and promote the development and application of the existing advanced technologies in railways. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.01511v1-abstract-full').style.display = 'none'; document.getElementById('2105.01511v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.11296">arXiv:2101.11296</a> <span> [<a href="https://arxiv.org/pdf/2101.11296">pdf</a>, <a href="https://arxiv.org/format/2101.11296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FedH2L: Federated Learning with Model and Statistical Heterogeneity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yiying Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wei Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Huaimin Wang</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haibo Mi</a>, <a href="/search/cs?searchtype=author&query=Hospedales%2C+T+M">Timothy M. Hospedales</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.11296v3-abstract-short" style="display: inline;"> Federated learning (FL) enables distributed participants to collectively learn a strong global model without sacrificing their individual data privacy. Mainstream FL approaches require each participant to share a common network architecture and further assume that data are are sampled IID across participants. However, in real-world deployments participants may require heterogeneous network archite… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.11296v3-abstract-full').style.display = 'inline'; document.getElementById('2101.11296v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.11296v3-abstract-full" style="display: none;"> Federated learning (FL) enables distributed participants to collectively learn a strong global model without sacrificing their individual data privacy. Mainstream FL approaches require each participant to share a common network architecture and further assume that data are are sampled IID across participants. However, in real-world deployments participants may require heterogeneous network architectures; and the data distribution is almost certainly non-uniform across participants. To address these issues we introduce FedH2L, which is agnostic to both the model architecture and robust to different data distributions across participants. In contrast to approaches sharing parameters or gradients, FedH2L relies on mutual distillation, exchanging only posteriors on a shared seed set between participants in a decentralized manner. This makes it extremely bandwidth efficient, model agnostic, and crucially produces models capable of performing well on the whole data distribution when learning from heterogeneous silos. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.11296v3-abstract-full').style.display = 'none'; document.getElementById('2101.11296v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.12832">arXiv:1810.12832</a> <span> [<a href="https://arxiv.org/pdf/1810.12832">pdf</a>, <a href="https://arxiv.org/format/1810.12832">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1121/1.5111059">10.1121/1.5111059 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> General audio tagging with ensembling convolutional neural network and statistical features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kele Xu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+B">Boqing Zhu</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+Q">Qiuqiang Kong</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haibo Mi</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+B">Bo Ding</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dezhi Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Huaimin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.12832v1-abstract-short" style="display: inline;"> Audio tagging aims to infer descriptive labels from audio clips. Audio tagging is challenging due to the limited size of data and noisy labels. In this paper, we describe our solution for the DCASE 2018 Task 2 general audio tagging challenge. The contributions of our solution include: We investigated a variety of convolutional neural network architectures to solve the audio tagging task. Statistic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.12832v1-abstract-full').style.display = 'inline'; document.getElementById('1810.12832v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.12832v1-abstract-full" style="display: none;"> Audio tagging aims to infer descriptive labels from audio clips. Audio tagging is challenging due to the limited size of data and noisy labels. In this paper, we describe our solution for the DCASE 2018 Task 2 general audio tagging challenge. The contributions of our solution include: We investigated a variety of convolutional neural network architectures to solve the audio tagging task. Statistical features are applied to capture statistical patterns of audio features to improve the classification performance. Ensemble learning is applied to ensemble the outputs from the deep classifiers to utilize complementary information. a sample re-weight strategy is employed for ensemble training to address the noisy label problem. Our system achieves a mean average precision (mAP@3) of 0.958, outperforming the baseline system of 0.704. Our system ranked the 1st and 4th out of 558 submissions in the public and private leaderboard of DCASE 2018 Task 2 challenge. Our codes are available at https://github.com/Cocoxili/DCASE2018Task2/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.12832v1-abstract-full').style.display = 'none'; document.getElementById('1810.12832v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICASSP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.06877">arXiv:1810.06877</a> <span> [<a href="https://arxiv.org/pdf/1810.06877">pdf</a>, <a href="https://arxiv.org/format/1810.06877">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Collaborative Deep Learning Across Multiple Data Centers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kele Xu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haibo Mi</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+D">Dawei Feng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Huaimin Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chuan Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zibin Zheng</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+X">Xu Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.06877v1-abstract-short" style="display: inline;"> Valuable training data is often owned by independent organizations and located in multiple data centers. Most deep learning approaches require to centralize the multi-datacenter data for performance purpose. In practice, however, it is often infeasible to transfer all data to a centralized data center due to not only bandwidth limitation but also the constraints of privacy regulations. Model avera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.06877v1-abstract-full').style.display = 'inline'; document.getElementById('1810.06877v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.06877v1-abstract-full" style="display: none;"> Valuable training data is often owned by independent organizations and located in multiple data centers. Most deep learning approaches require to centralize the multi-datacenter data for performance purpose. In practice, however, it is often infeasible to transfer all data to a centralized data center due to not only bandwidth limitation but also the constraints of privacy regulations. Model averaging is a conventional choice for data parallelized training, but its ineffectiveness is claimed by previous studies as deep neural networks are often non-convex. In this paper, we argue that model averaging can be effective in the decentralized environment by using two strategies, namely, the cyclical learning rate and the increased number of epochs for local model training. With the two strategies, we show that model averaging can provide competitive performance in the decentralized mode compared to the data-centralized one. In a practical environment with multiple data centers, we conduct extensive experiments using state-of-the-art deep network architectures on different types of data. Results demonstrate the effectiveness and robustness of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.06877v1-abstract-full').style.display = 'none'; document.getElementById('1810.06877v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to AAAI 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1806.04422">arXiv:1806.04422</a> <span> [<a href="https://arxiv.org/pdf/1806.04422">pdf</a>, <a href="https://arxiv.org/format/1806.04422">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Sample Dropout for Audio Scene Classification Using Multi-Scale Dense Connected Convolutional Neural Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+D">Dawei Feng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kele Xu</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haibo Mi</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+F">Feifan Liao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yan Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1806.04422v1-abstract-short" style="display: inline;"> Acoustic scene classification is an intricate problem for a machine. As an emerging field of research, deep Convolutional Neural Networks (CNN) achieve convincing results. In this paper, we explore the use of multi-scale Dense connected convolutional neural network (DenseNet) for the classification task, with the goal to improve the classification performance as multi-scale features can be extract… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.04422v1-abstract-full').style.display = 'inline'; document.getElementById('1806.04422v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1806.04422v1-abstract-full" style="display: none;"> Acoustic scene classification is an intricate problem for a machine. As an emerging field of research, deep Convolutional Neural Networks (CNN) achieve convincing results. In this paper, we explore the use of multi-scale Dense connected convolutional neural network (DenseNet) for the classification task, with the goal to improve the classification performance as multi-scale features can be extracted from the time-frequency representation of the audio signal. On the other hand, most of previous CNN-based audio scene classification approaches aim to improve the classification accuracy, by employing different regularization techniques, such as the dropout of hidden units and data augmentation, to reduce overfitting. It is widely known that outliers in the training set have a high negative influence on the trained model, and culling the outliers may improve the classification performance, while it is often under-explored in previous studies. In this paper, inspired by the silence removal in the speech signal processing, a novel sample dropout approach is proposed, which aims to remove outliers in the training dataset. Using the DCASE 2017 audio scene classification datasets, the experimental results demonstrates the proposed multi-scale DenseNet providing a superior performance than the traditional single-scale DenseNet, while the sample dropout method can further improve the classification robustness of multi-scale DenseNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.04422v1-abstract-full').style.display = 'none'; document.getElementById('1806.04422v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to 2018 Pacific Rim Knowledge Acquisition Workshop (PKAW)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.07319">arXiv:1805.07319</a> <span> [<a href="https://arxiv.org/pdf/1805.07319">pdf</a>, <a href="https://arxiv.org/format/1805.07319">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Mixup-Based Acoustic Scene Classification Using Multi-Channel Convolutional Neural Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kele Xu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+D">Dawei Feng</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haibo Mi</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+B">Boqing Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dezhi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lilun Zhang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+H">Hengxing Cai</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuwen Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.07319v1-abstract-short" style="display: inline;"> Audio scene classification, the problem of predicting class labels of audio scenes, has drawn lots of attention during the last several years. However, it remains challenging and falls short of accuracy and efficiency. Recently, Convolutional Neural Network (CNN)-based methods have achieved better performance with comparison to the traditional methods. Nevertheless, conventional single channel CNN… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.07319v1-abstract-full').style.display = 'inline'; document.getElementById('1805.07319v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.07319v1-abstract-full" style="display: none;"> Audio scene classification, the problem of predicting class labels of audio scenes, has drawn lots of attention during the last several years. However, it remains challenging and falls short of accuracy and efficiency. Recently, Convolutional Neural Network (CNN)-based methods have achieved better performance with comparison to the traditional methods. Nevertheless, conventional single channel CNN may fail to consider the fact that additional cues may be embedded in the multi-channel recordings. In this paper, we explore the use of Multi-channel CNN for the classification task, which aims to extract features from different channels in an end-to-end manner. We conduct the evaluation compared with the conventional CNN and traditional Gaussian Mixture Model-based methods. Moreover, to improve the classification accuracy further, this paper explores the using of mixup method. In brief, mixup trains the neural network on linear combinations of pairs of the representation of audio scene examples and their labels. By employing the mixup approach for data argumentation, the novel model can provide higher prediction accuracy and robustness in contrast with previous models, while the generalization error can also be reduced on the evaluation data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.07319v1-abstract-full').style.display = 'none'; document.getElementById('1805.07319v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Mi%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Mi%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Mi%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository