Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,879 results for author: <span class="mathjax">Han, S</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Han, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Han%2C+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Han, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Han%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Han%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Han%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Han%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Han%2C+S&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Han%2C+S&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12776">arXiv:2411.12776</a> <span> [<a href="https://arxiv.org/pdf/2411.12776">pdf</a>, <a href="https://arxiv.org/format/2411.12776">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Cross-Layer Encrypted Semantic Communication Framework for Panoramic Video Transmission </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gao%2C+H">Haixiao Gao</a>, <a href="/search/?searchtype=author&query=Sun%2C+M">Mengying Sun</a>, <a href="/search/?searchtype=author&query=Xu%2C+X">Xiaodong Xu</a>, <a href="/search/?searchtype=author&query=Xu%2C+B">Bingxuan Xu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shujun Han</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Bizhu Wang</a>, <a href="/search/?searchtype=author&query=Jiang%2C+S">Sheng Jiang</a>, <a href="/search/?searchtype=author&query=Dong%2C+C">Chen Dong</a>, <a href="/search/?searchtype=author&query=Zhang%2C+P">Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12776v1-abstract-short" style="display: inline;"> In this paper, we propose a cross-layer encrypted semantic communication (CLESC) framework for panoramic video transmission, incorporating feature extraction, encoding, encryption, cyclic redundancy check (CRC), and retransmission processes to achieve compatibility between semantic communication and traditional communication systems. Additionally, we propose an adaptive cross-layer transmission me… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12776v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12776v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12776v1-abstract-full" style="display: none;"> In this paper, we propose a cross-layer encrypted semantic communication (CLESC) framework for panoramic video transmission, incorporating feature extraction, encoding, encryption, cyclic redundancy check (CRC), and retransmission processes to achieve compatibility between semantic communication and traditional communication systems. Additionally, we propose an adaptive cross-layer transmission mechanism that dynamically adjusts CRC, channel coding, and retransmission schemes based on the importance of semantic information. This ensures that important information is prioritized under poor transmission conditions. To verify the aforementioned framework, we also design an end-to-end adaptive panoramic video semantic transmission (APVST) network that leverages a deep joint source-channel coding (Deep JSCC) structure and attention mechanism, integrated with a latitude adaptive module that facilitates adaptive semantic feature extraction and variable-length encoding of panoramic videos. The proposed CLESC is also applicable to the transmission of other modal data. Simulation results demonstrate that the proposed CLESC effectively achieves compatibility and adaptation between semantic communication and traditional communication systems, improving both transmission efficiency and channel adaptability. Compared to traditional cross-layer transmission schemes, the CLESC framework can reduce bandwidth consumption by 85% while showing significant advantages under low signal-to-noise ratio (SNR) conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12776v1-abstract-full').style.display = 'none'; document.getElementById('2411.12776v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11283">arXiv:2411.11283</a> <span> [<a href="https://arxiv.org/pdf/2411.11283">pdf</a>, <a href="https://arxiv.org/format/2411.11283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-Hyperbolic Space-based Heterogeneous Graph Attention Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Park%2C+J">Jongmin Park</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Seunghoon Han</a>, <a href="/search/?searchtype=author&query=Lee%2C+J">Jong-Ryul Lee</a>, <a href="/search/?searchtype=author&query=Lim%2C+S">Sungsu Lim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11283v1-abstract-short" style="display: inline;"> To leverage the complex structures within heterogeneous graphs, recent studies on heterogeneous graph embedding use a hyperbolic space, characterized by a constant negative curvature and exponentially increasing space, which aligns with the structural properties of heterogeneous graphs. However, despite heterogeneous graphs inherently possessing diverse power-law structures, most hyperbolic hetero… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11283v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11283v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11283v1-abstract-full" style="display: none;"> To leverage the complex structures within heterogeneous graphs, recent studies on heterogeneous graph embedding use a hyperbolic space, characterized by a constant negative curvature and exponentially increasing space, which aligns with the structural properties of heterogeneous graphs. However, despite heterogeneous graphs inherently possessing diverse power-law structures, most hyperbolic heterogeneous graph embedding models use a single hyperbolic space for the entire heterogeneous graph, which may not effectively capture the diverse power-law structures within the heterogeneous graph. To address this limitation, we propose Multi-hyperbolic Space-based heterogeneous Graph Attention Network (MSGAT), which uses multiple hyperbolic spaces to effectively capture diverse power-law structures within heterogeneous graphs. We conduct comprehensive experiments to evaluate the effectiveness of MSGAT. The experimental results demonstrate that MSGAT outperforms state-of-the-art baselines in various graph machine learning tasks, effectively capturing the complex structures of heterogeneous graphs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11283v1-abstract-full').style.display = 'none'; document.getElementById('2411.11283v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in IEEE ICDM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09906">arXiv:2411.09906</a> <span> [<a href="https://arxiv.org/pdf/2411.09906">pdf</a>, <a href="https://arxiv.org/format/2411.09906">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Machine Learning-based Physical-Layer Authentication in Wireless Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Meng%2C+R">Rui Meng</a>, <a href="/search/?searchtype=author&query=Xu%2C+B">Bingxuan Xu</a>, <a href="/search/?searchtype=author&query=Xu%2C+X">Xiaodong Xu</a>, <a href="/search/?searchtype=author&query=Sun%2C+M">Mengying Sun</a>, <a href="/search/?searchtype=author&query=Wanga%2C+B">Bizhu Wanga</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shujun Han</a>, <a href="/search/?searchtype=author&query=Lv%2C+S">Suyu Lv</a>, <a href="/search/?searchtype=author&query=Zhang%2C+P">Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09906v1-abstract-short" style="display: inline;"> To ensure secure and reliable communication in wireless systems, authenticating the identities of numerous nodes is imperative. Traditional cryptography-based authentication methods suffer from issues such as low compatibility, reliability, and high complexity. Physical-Layer Authentication (PLA) is emerging as a promising complement due to its exploitation of unique properties in wireless environ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09906v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09906v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09906v1-abstract-full" style="display: none;"> To ensure secure and reliable communication in wireless systems, authenticating the identities of numerous nodes is imperative. Traditional cryptography-based authentication methods suffer from issues such as low compatibility, reliability, and high complexity. Physical-Layer Authentication (PLA) is emerging as a promising complement due to its exploitation of unique properties in wireless environments. Recently, Machine Learning (ML)-based PLA has gained attention for its intelligence, adaptability, universality, and scalability compared to non-ML approaches. However, a comprehensive overview of state-of-the-art ML-based PLA and its foundational aspects is lacking. This paper presents a comprehensive survey of characteristics and technologies that can be used in the ML-based PLA. We categorize existing ML-based PLA schemes into two main types: multi-device identification and attack detection schemes. In deep learning-based multi-device identification schemes, Deep Neural Networks are employed to train models, avoiding complex processing and expert feature transformation. Deep learning-based multi-device identification schemes are further subdivided, with schemes based on Convolutional Neural Networks being extensively researched. In ML-based attack detection schemes, receivers utilize intelligent ML techniques to set detection thresholds automatically, eliminating the need for manual calculation or knowledge of channel models. ML-based attack detection schemes are categorized into three sub-types: Supervised Learning, Unsupervised Learning, and Reinforcement Learning. Additionally, we summarize open-source datasets used for PLA, encompassing Radio Frequency fingerprints and channel fingerprints. Finally, this paper outlines future research directions to guide researchers in related fields. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09906v1-abstract-full').style.display = 'none'; document.getElementById('2411.09906v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">111 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06766">arXiv:2411.06766</a> <span> [<a href="https://arxiv.org/pdf/2411.06766">pdf</a>, <a href="https://arxiv.org/format/2411.06766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LRA.2024.3498779">10.1109/LRA.2024.3498779 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> GenZ-ICP: Generalizable and Degeneracy-Robust LiDAR Odometry Using an Adaptive Weighting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lee%2C+D">Daehan Lee</a>, <a href="/search/?searchtype=author&query=Lim%2C+H">Hyungtae Lim</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Soohee Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06766v1-abstract-short" style="display: inline;"> Light detection and ranging (LiDAR)-based odometry has been widely utilized for pose estimation due to its use of high-accuracy range measurements and immunity to ambient light conditions. However, the performance of LiDAR odometry varies depending on the environment and deteriorates in degenerative environments such as long corridors. This issue stems from the dependence on a single error metric,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06766v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06766v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06766v1-abstract-full" style="display: none;"> Light detection and ranging (LiDAR)-based odometry has been widely utilized for pose estimation due to its use of high-accuracy range measurements and immunity to ambient light conditions. However, the performance of LiDAR odometry varies depending on the environment and deteriorates in degenerative environments such as long corridors. This issue stems from the dependence on a single error metric, which has different strengths and weaknesses depending on the geometrical characteristics of the surroundings. To address these problems, this study proposes a novel iterative closest point (ICP) method called GenZ-ICP. We revisited both point-to-plane and point-to-point error metrics and propose a method that leverages their strengths in a complementary manner. Moreover, adaptability to diverse environments was enhanced by utilizing an adaptive weight that is adjusted based on the geometrical characteristics of the surroundings. As demonstrated in our experimental evaluation, the proposed GenZ-ICP exhibits high adaptability to various environments and resilience to optimization degradation in corridor-like degenerative scenarios by preventing ill-posed problems during the optimization process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06766v1-abstract-full').style.display = 'none'; document.getElementById('2411.06766v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures, Accepted to IEEE Robotics and Automation Letters (RA-L)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2024 IEEE Robotics and Automation Letters (RA-L) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05910">arXiv:2411.05910</a> <span> [<a href="https://arxiv.org/pdf/2411.05910">pdf</a>, <a href="https://arxiv.org/format/2411.05910">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Astrophysics of Galaxies">astro-ph.GA</span> </div> </div> <p class="title is-5 mathjax"> Exploring lenticular galaxy formation in field environments using NewHorizon: evidence for counter-rotating gas accretion as a formation channel </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+S">Seongbong Han</a>, <a href="/search/?searchtype=author&query=Jang%2C+J+K">J. K. Jang</a>, <a href="/search/?searchtype=author&query=Contini%2C+E">Emanuele Contini</a>, <a href="/search/?searchtype=author&query=Dubois%2C+Y">Yohan Dubois</a>, <a href="/search/?searchtype=author&query=Jeon%2C+S">Seyoung Jeon</a>, <a href="/search/?searchtype=author&query=Kaviraj%2C+S">Sugata Kaviraj</a>, <a href="/search/?searchtype=author&query=Kimm%2C+T">Taysun Kimm</a>, <a href="/search/?searchtype=author&query=Kraljic%2C+K">Katarina Kraljic</a>, <a href="/search/?searchtype=author&query=Oh%2C+S">Sree Oh</a>, <a href="/search/?searchtype=author&query=Peirani%2C+S">Sebastien Peirani</a>, <a href="/search/?searchtype=author&query=Pichon%2C+C">Christophe Pichon</a>, <a href="/search/?searchtype=author&query=Yi%2C+S+K">Sukyoung K. Yi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05910v1-abstract-short" style="display: inline;"> The formation pathways of lenticular galaxies (S0s) in field environments remain a matter of debate. We utilize the cosmological hydrodynamic simulation, NewHorizon, to investigate the issue. We select two massive star-formation quenched S0s as our main sample. By closely tracing their physical and morphological evolution, we identify two primary formation channels: mergers and counter-rotating ga… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05910v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05910v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05910v1-abstract-full" style="display: none;"> The formation pathways of lenticular galaxies (S0s) in field environments remain a matter of debate. We utilize the cosmological hydrodynamic simulation, NewHorizon, to investigate the issue. We select two massive star-formation quenched S0s as our main sample. By closely tracing their physical and morphological evolution, we identify two primary formation channels: mergers and counter-rotating gas accretion. The former induces central gas inflow due to gravitational and hydrodynamic torques, triggering active central star formation which quickly depletes the gas of the galaxy. Counter-rotating gas accretion overall has a similar outcome but more exclusively through hydrodynamic collisions between the pre-existing and newly-accreted gas. Both channels lead to S0 morphology, with gas angular momentum cancellation being a crucial mechanism. These formation pathways quench star formation on a short timescale (< Gyr) compared to the timescales of environmental effects. We also discuss how counter-rotating gas accretion may explain the origin of S0s with ongoing star formation and the frequently observed gas-star misaligned kinematics in S0s. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05910v1-abstract-full').style.display = 'none'; document.getElementById('2411.05910v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 16 figures, Accepted for publication in ApJ, Oct 27 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05281">arXiv:2411.05281</a> <span> [<a href="https://arxiv.org/pdf/2411.05281">pdf</a>, <a href="https://arxiv.org/format/2411.05281">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Fox-1 Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hu%2C+Z">Zijian Hu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jipeng Zhang</a>, <a href="/search/?searchtype=author&query=Pan%2C+R">Rui Pan</a>, <a href="/search/?searchtype=author&query=Xu%2C+Z">Zhaozhuo Xu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shanshan Han</a>, <a href="/search/?searchtype=author&query=Jin%2C+H">Han Jin</a>, <a href="/search/?searchtype=author&query=Shah%2C+A+D">Alay Dilipbhai Shah</a>, <a href="/search/?searchtype=author&query=Stripelis%2C+D">Dimitris Stripelis</a>, <a href="/search/?searchtype=author&query=Yao%2C+Y">Yuhang Yao</a>, <a href="/search/?searchtype=author&query=Avestimehr%2C+S">Salman Avestimehr</a>, <a href="/search/?searchtype=author&query=He%2C+C">Chaoyang He</a>, <a href="/search/?searchtype=author&query=Zhang%2C+T">Tong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05281v2-abstract-short" style="display: inline;"> We present Fox-1, a series of small language models (SLMs) consisting of Fox-1-1.6B and Fox-1-1.6B-Instruct-v0.1. These models are pre-trained on 3 trillion tokens of web-scraped document data and fine-tuned with 5 billion tokens of instruction-following and multi-turn conversation data. Aiming to improve the pre-training efficiency, Fox-1-1.6B model introduces a novel 3-stage data curriculum acro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05281v2-abstract-full').style.display = 'inline'; document.getElementById('2411.05281v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05281v2-abstract-full" style="display: none;"> We present Fox-1, a series of small language models (SLMs) consisting of Fox-1-1.6B and Fox-1-1.6B-Instruct-v0.1. These models are pre-trained on 3 trillion tokens of web-scraped document data and fine-tuned with 5 billion tokens of instruction-following and multi-turn conversation data. Aiming to improve the pre-training efficiency, Fox-1-1.6B model introduces a novel 3-stage data curriculum across all the training data with 2K-8K sequence length. In architecture design, Fox-1 features a deeper layer structure, an expanded vocabulary, and utilizes Grouped Query Attention (GQA), offering a performant and efficient architecture compared to other SLMs. Fox-1 achieves better or on-par performance in various benchmarks compared to StableLM-2-1.6B, Gemma-2B, Qwen1.5-1.8B, and OpenELM1.1B, with competitive inference speed and throughput. The model weights have been released under the Apache 2.0 license, where we aim to promote the democratization of LLMs and make them fully accessible to the whole open-source community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05281v2-abstract-full').style.display = 'none'; document.getElementById('2411.05281v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Base model is available at https://huggingface.co/tensoropera/Fox-1-1.6B and the instruction-tuned version is available at https://huggingface.co/tensoropera/Fox-1-1.6B-Instruct-v0.1</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05209">arXiv:2411.05209</a> <span> [<a href="https://arxiv.org/pdf/2411.05209">pdf</a>, <a href="https://arxiv.org/format/2411.05209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Alopex: A Computational Framework for Enabling On-Device Function Calls with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ran%2C+Y">Yide Ran</a>, <a href="/search/?searchtype=author&query=Xu%2C+Z">Zhaozhuo Xu</a>, <a href="/search/?searchtype=author&query=Yao%2C+Y">Yuhang Yao</a>, <a href="/search/?searchtype=author&query=Hu%2C+Z">Zijian Hu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shanshan Han</a>, <a href="/search/?searchtype=author&query=Jin%2C+H">Han Jin</a>, <a href="/search/?searchtype=author&query=Shah%2C+A+D">Alay Dilipbhai Shah</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jipeng Zhang</a>, <a href="/search/?searchtype=author&query=Stripelis%2C+D">Dimitris Stripelis</a>, <a href="/search/?searchtype=author&query=Zhang%2C+T">Tong Zhang</a>, <a href="/search/?searchtype=author&query=Avestimehr%2C+S">Salman Avestimehr</a>, <a href="/search/?searchtype=author&query=He%2C+C">Chaoyang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05209v1-abstract-short" style="display: inline;"> The rapid advancement of Large Language Models (LLMs) has led to their increased integration into mobile devices for personalized assistance, which enables LLMs to call external API functions to enhance their performance. However, challenges such as data scarcity, ineffective question formatting, and catastrophic forgetting hinder the development of on-device LLM agents. To tackle these issues, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05209v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05209v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05209v1-abstract-full" style="display: none;"> The rapid advancement of Large Language Models (LLMs) has led to their increased integration into mobile devices for personalized assistance, which enables LLMs to call external API functions to enhance their performance. However, challenges such as data scarcity, ineffective question formatting, and catastrophic forgetting hinder the development of on-device LLM agents. To tackle these issues, we propose Alopex, a framework that enables precise on-device function calls using the Fox LLM. Alopex introduces a logic-based method for generating high-quality training data and a novel ``description-question-output'' format for fine-tuning, reducing risks of function information leakage. Additionally, a data mixing strategy is used to mitigate catastrophic forgetting, combining function call data with textbook datasets to enhance performance in various tasks. Experimental results show that Alopex improves function call accuracy and significantly reduces catastrophic forgetting, providing a robust solution for integrating function call capabilities into LLMs without manual intervention. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05209v1-abstract-full').style.display = 'none'; document.getElementById('2411.05209v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05007">arXiv:2411.05007</a> <span> [<a href="https://arxiv.org/pdf/2411.05007">pdf</a>, <a href="https://arxiv.org/format/2411.05007">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SVDQuant: Absorbing Outliers by Low-Rank Components for 4-Bit Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+M">Muyang Li</a>, <a href="/search/?searchtype=author&query=Lin%2C+Y">Yujun Lin</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhekai Zhang</a>, <a href="/search/?searchtype=author&query=Cai%2C+T">Tianle Cai</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xiuyu Li</a>, <a href="/search/?searchtype=author&query=Guo%2C+J">Junxian Guo</a>, <a href="/search/?searchtype=author&query=Xie%2C+E">Enze Xie</a>, <a href="/search/?searchtype=author&query=Meng%2C+C">Chenlin Meng</a>, <a href="/search/?searchtype=author&query=Zhu%2C+J">Jun-Yan Zhu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Song Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05007v2-abstract-short" style="display: inline;"> Diffusion models have been proven highly effective at generating high-quality images. However, as these models grow larger, they require significantly more memory and suffer from higher latency, posing substantial challenges for deployment. In this work, we aim to accelerate diffusion models by quantizing their weights and activations to 4 bits. At such an aggressive level, both weights and activa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05007v2-abstract-full').style.display = 'inline'; document.getElementById('2411.05007v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05007v2-abstract-full" style="display: none;"> Diffusion models have been proven highly effective at generating high-quality images. However, as these models grow larger, they require significantly more memory and suffer from higher latency, posing substantial challenges for deployment. In this work, we aim to accelerate diffusion models by quantizing their weights and activations to 4 bits. At such an aggressive level, both weights and activations are highly sensitive, where conventional post-training quantization methods for large language models like smoothing become insufficient. To overcome this limitation, we propose SVDQuant, a new 4-bit quantization paradigm. Different from smoothing which redistributes outliers between weights and activations, our approach absorbs these outliers using a low-rank branch. We first consolidate the outliers by shifting them from activations to weights, then employ a high-precision low-rank branch to take in the weight outliers with Singular Value Decomposition (SVD). This process eases the quantization on both sides. However, na茂vely running the low-rank branch independently incurs significant overhead due to extra data movement of activations, negating the quantization speedup. To address this, we co-design an inference engine Nunchaku that fuses the kernels of the low-rank branch into those of the low-bit branch to cut off redundant memory access. It can also seamlessly support off-the-shelf low-rank adapters (LoRAs) without the need for re-quantization. Extensive experiments on SDXL, PixArt-$危$, and FLUX.1 validate the effectiveness of SVDQuant in preserving image quality. We reduce the memory usage for the 12B FLUX.1 models by 3.5$\times$, achieving 3.0$\times$ speedup over the 4-bit weight-only quantized baseline on the 16GB laptop 4090 GPU, paving the way for more interactive applications on PCs. Our quantization library and inference engine are open-sourced. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05007v2-abstract-full').style.display = 'none'; document.getElementById('2411.05007v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Quantization Library: https://github.com/mit-han-lab/deepcompressor Inference Engine: https://github.com/mit-han-lab/nunchaku Website: https://hanlab.mit.edu/projects/svdquant Demo: https://svdquant.mit.edu Blog: https://hanlab.mit.edu/blog/svdquant</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04833">arXiv:2411.04833</a> <span> [<a href="https://arxiv.org/pdf/2411.04833">pdf</a>, <a href="https://arxiv.org/format/2411.04833">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Finding Control Invariant Sets via Lipschitz Constants of Linear Programs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Vahs%2C+M">Matti Vahs</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shaohang Han</a>, <a href="/search/?searchtype=author&query=Tumova%2C+J">Jana Tumova</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04833v1-abstract-short" style="display: inline;"> Control invariant sets play an important role in safety-critical control and find broad application in numerous fields such as obstacle avoidance for mobile robots. However, finding valid control invariant sets of dynamical systems under input limitations is notoriously difficult. We present an approach to safely expand an initial set while always guaranteeing that the set is control invariant. Sp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04833v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04833v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04833v1-abstract-full" style="display: none;"> Control invariant sets play an important role in safety-critical control and find broad application in numerous fields such as obstacle avoidance for mobile robots. However, finding valid control invariant sets of dynamical systems under input limitations is notoriously difficult. We present an approach to safely expand an initial set while always guaranteeing that the set is control invariant. Specifically, we define an expansion law for the boundary of a set and check for control invariance using Linear Programs (LPs). To verify control invariance on a continuous domain, we leverage recently proposed Lipschitz constants of LPs to transform the problem of continuous verification into a finite number of LPs. Using concepts from differentiable optimization, we derive the safe expansion law of the control invariant set and show how it can be interpreted as a second invariance problem in the space of possible boundaries. Finally, we show how the obtained set can be used to obtain a minimally invasive safety filter in a Control Barrier Function (CBF) framework. Our work is supported by theoretical results as well as numerical examples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04833v1-abstract-full').style.display = 'none'; document.getElementById('2411.04833v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03640">arXiv:2411.03640</a> <span> [<a href="https://arxiv.org/pdf/2411.03640">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1126/sciadv.adl4871">10.1126/sciadv.adl4871 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Efficient learning of mixed-state tomography for photonic quantum walk </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+Q">Qin-Qin Wang</a>, <a href="/search/?searchtype=author&query=Dong%2C+S">Shaojun Dong</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xiao-Wei Li</a>, <a href="/search/?searchtype=author&query=Xu%2C+X">Xiao-Ye Xu</a>, <a href="/search/?searchtype=author&query=Wang%2C+C">Chao Wang</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shuai Han</a>, <a href="/search/?searchtype=author&query=Yung%2C+M">Man-Hong Yung</a>, <a href="/search/?searchtype=author&query=Han%2C+Y">Yong-Jian Han</a>, <a href="/search/?searchtype=author&query=Li%2C+C">Chuan-Feng Li</a>, <a href="/search/?searchtype=author&query=Guo%2C+G">Guang-Can Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03640v1-abstract-short" style="display: inline;"> Noise-enhanced applications in open quantum walk (QW) have recently seen a surge due to their ability to improve performance. However, verifying the success of open QW is challenging, as mixed-state tomography is a resource-intensive process, and implementing all required measurements is almost impossible due to various physical constraints. To address this challenge, we present a neural-network-b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03640v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03640v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03640v1-abstract-full" style="display: none;"> Noise-enhanced applications in open quantum walk (QW) have recently seen a surge due to their ability to improve performance. However, verifying the success of open QW is challenging, as mixed-state tomography is a resource-intensive process, and implementing all required measurements is almost impossible due to various physical constraints. To address this challenge, we present a neural-network-based method for reconstructing mixed states with a high fidelity (~97.5%) while costing only 50% of the number of measurements typically required for open discrete-time QW in one dimension. Our method uses a neural density operator that models the system and environment, followed by a generalized natural gradient descent procedure that significantly speeds up the training process. Moreover, we introduce a compact interferometric measurement device, improving the scalability of our photonic QW setup that enables experimental learning of mixed states. Our results demonstrate that highly expressive neural networks can serve as powerful alternatives to traditional state tomography. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03640v1-abstract-full').style.display = 'none'; document.getElementById('2411.03640v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Sci. Adv. 10, eadl4871 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02893">arXiv:2411.02893</a> <span> [<a href="https://arxiv.org/pdf/2411.02893">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Generalization vs. Hallucination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+X">Xuyu Zhang</a>, <a href="/search/?searchtype=author&query=Huang%2C+H">Haofan Huang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+D">Dawei Zhang</a>, <a href="/search/?searchtype=author&query=Zhuang%2C+S">Songlin Zhuang</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shensheng Han</a>, <a href="/search/?searchtype=author&query=Lai%2C+P">Puxiang Lai</a>, <a href="/search/?searchtype=author&query=Liu%2C+H">Honglin Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02893v1-abstract-short" style="display: inline;"> With fast developments in computational power and algorithms, deep learning has made breakthroughs and been applied in many fields. However, generalization remains to be a critical challenge, and the limited generalization capability severely constrains its practical applications. Hallucination issue is another unresolved conundrum haunting deep learning and large models. By leveraging a physical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02893v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02893v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02893v1-abstract-full" style="display: none;"> With fast developments in computational power and algorithms, deep learning has made breakthroughs and been applied in many fields. However, generalization remains to be a critical challenge, and the limited generalization capability severely constrains its practical applications. Hallucination issue is another unresolved conundrum haunting deep learning and large models. By leveraging a physical model of imaging through scattering media, we studied the lack of generalization to system response functions in deep learning, identified its cause, and proposed a universal solution. The research also elucidates the creation process of a hallucination in image prediction and reveals its cause, and the common relationship between generalization and hallucination is discovered and clarified. Generally speaking, it enhances the interpretability of deep learning from a physics-based perspective, and builds a universal physical framework for deep learning in various fields. It may pave a way for direct interaction between deep learning and the real world, facilitating the transition of deep learning from a demo model to a practical tool in diverse applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02893v1-abstract-full').style.display = 'none'; document.getElementById('2411.02893v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02722">arXiv:2411.02722</a> <span> [<a href="https://arxiv.org/pdf/2411.02722">pdf</a>, <a href="https://arxiv.org/format/2411.02722">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Commonsense Knowledge Distillation for Visual Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yang%2C+S">Shuo Yang</a>, <a href="/search/?searchtype=author&query=Luo%2C+S">Siwen Luo</a>, <a href="/search/?searchtype=author&query=Han%2C+S+C">Soyeon Caren Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02722v1-abstract-short" style="display: inline;"> Existing Multimodal Large Language Models (MLLMs) and Visual Language Pretrained Models (VLPMs) have shown remarkable performances in the general Visual Question Answering (VQA). However, these models struggle with VQA questions that require external commonsense knowledge due to the challenges in generating high-quality prompts and the high computational costs of fine-tuning. In this work, we prop… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02722v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02722v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02722v1-abstract-full" style="display: none;"> Existing Multimodal Large Language Models (MLLMs) and Visual Language Pretrained Models (VLPMs) have shown remarkable performances in the general Visual Question Answering (VQA). However, these models struggle with VQA questions that require external commonsense knowledge due to the challenges in generating high-quality prompts and the high computational costs of fine-tuning. In this work, we propose a novel graph-based multimodal commonsense knowledge distillation framework that constructs a unified relational graph over commonsense knowledge, visual objects and questions through a Graph Convolutional Network (GCN) following a teacher-student environment. This proposed framework is flexible with any type of teacher and student models without further fine-tuning, and has achieved competitive performances on the ScienceQA dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02722v1-abstract-full').style.display = 'none'; document.getElementById('2411.02722v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2025 (Accepted, Oral)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01839">arXiv:2411.01839</a> <span> [<a href="https://arxiv.org/pdf/2411.01839">pdf</a>, <a href="https://arxiv.org/format/2411.01839">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TriG-NER: Triplet-Grid Framework for Discontinuous Named Entity Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cabral%2C+R+C">Rina Carines Cabral</a>, <a href="/search/?searchtype=author&query=Han%2C+S+C">Soyeon Caren Han</a>, <a href="/search/?searchtype=author&query=Alhassan%2C+A">Areej Alhassan</a>, <a href="/search/?searchtype=author&query=Batista-Navarro%2C+R">Riza Batista-Navarro</a>, <a href="/search/?searchtype=author&query=Nenadic%2C+G">Goran Nenadic</a>, <a href="/search/?searchtype=author&query=Poon%2C+J">Josiah Poon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01839v1-abstract-short" style="display: inline;"> Discontinuous Named Entity Recognition (DNER) presents a challenging problem where entities may be scattered across multiple non-adjacent tokens, making traditional sequence labelling approaches inadequate. Existing methods predominantly rely on custom tagging schemes to handle these discontinuous entities, resulting in models tightly coupled to specific tagging strategies and lacking generalisabi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01839v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01839v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01839v1-abstract-full" style="display: none;"> Discontinuous Named Entity Recognition (DNER) presents a challenging problem where entities may be scattered across multiple non-adjacent tokens, making traditional sequence labelling approaches inadequate. Existing methods predominantly rely on custom tagging schemes to handle these discontinuous entities, resulting in models tightly coupled to specific tagging strategies and lacking generalisability across diverse datasets. To address these challenges, we propose TriG-NER, a novel Triplet-Grid Framework that introduces a generalisable approach to learning robust token-level representations for discontinuous entity extraction. Our framework applies triplet loss at the token level, where similarity is defined by word pairs existing within the same entity, effectively pulling together similar and pushing apart dissimilar ones. This approach enhances entity boundary detection and reduces the dependency on specific tagging schemes by focusing on word-pair relationships within a flexible grid structure. We evaluate TriG-NER on three benchmark DNER datasets and demonstrate significant improvements over existing grid-based architectures. These results underscore our framework's effectiveness in capturing complex entity structures and its adaptability to various tagging schemes, setting a new benchmark for discontinuous entity extraction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01839v1-abstract-full').style.display = 'none'; document.getElementById('2411.01839v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code will be made available upon publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23565">arXiv:2410.23565</a> <span> [<a href="https://arxiv.org/pdf/2410.23565">pdf</a>, <a href="https://arxiv.org/ps/2410.23565">ps</a>, <a href="https://arxiv.org/format/2410.23565">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="General Topology">math.GN</span> </div> </div> <p class="title is-5 mathjax"> Remarks on the digital-topological $k$-group structures and the development of the $AP_1$-$k$- and $AP_1^\ast$-$k$-group </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+S">Sang-Eon Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23565v1-abstract-short" style="display: inline;"> In the literature of a digital-topological ($DT$-, for brevity) group structure on a digital image $(X,k)$, roughly saying, two kinds of methods are shown. Given a digital image $(X,k)$, the first one, named by a $DT$-$k$-group, was established in 2022 \cite{H10} by using both the $G_{k^\ast}$- or $C_{k^\ast}$-adjacency \cite{H10} for the product $X^2:=X \times X$ and the $(G_{k^\ast},k)$- or… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23565v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23565v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23565v1-abstract-full" style="display: none;"> In the literature of a digital-topological ($DT$-, for brevity) group structure on a digital image $(X,k)$, roughly saying, two kinds of methods are shown. Given a digital image $(X,k)$, the first one, named by a $DT$-$k$-group, was established in 2022 \cite{H10} by using both the $G_{k^\ast}$- or $C_{k^\ast}$-adjacency \cite{H10} for the product $X^2:=X \times X$ and the $(G_{k^\ast},k)$- or $(C_{k^\ast},k)$-continuity for the multiplication $伪:X^2 \to X$ \cite{H10}. The second one with the name of $NP_i$-$DT$-groups, $i \in \{1,2\}$, was discussed in 2023 \cite{LS1} by using the $NP_i(k,k)$-adjacency for $X^2$ in \cite{B1} and the $(NP_i(k,k), k)$-continuities of the multiplication $伪:X^2 \to X$, $i\in \{1,2\}$. However, due to some defects of the $NP_u(k_1,k_2, \cdots, k_v)$-adjacency in \cite{B1,B2}, the $AP_u(k_1,k_2, \cdots, k_v)$-adjacency was recently developed as an alternative to the $NP_u(k_1,k_2, \cdots, k_v)$-adjacency (see Section 4). Besides, we also develop an $AP_u^\ast(k_1,k_2, \cdots, k_v)$-adjacency. For a digital image $(X, k)$, in case an $AP_1(k,k)$-($AP_1$-, for simplicity) adjacency on $X^2$ exists, we formulate both an $AP_1$-$k$- and an $AP_1^\ast$-$k$-group. Then we show that an $AP_1^\ast$-$k$-group is equivalent to a Han's $DT$-$k$-group based on both the $C_{k^\ast}$-adjacency on the product $X^2$ and the $(C_{k^\ast}, k)$-continuity for the multiplication $伪_1^\prime:(X^2, C_{k^\ast}) \to (X,k)$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23565v1-abstract-full').style.display = 'none'; document.getElementById('2410.23565v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper can play an important role in the fields of general topology, digital topology, digital geometry, and so on</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 22A05; 22A10; 54C08; 54H11; 68U10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19313">arXiv:2410.19313</a> <span> [<a href="https://arxiv.org/pdf/2410.19313">pdf</a>, <a href="https://arxiv.org/format/2410.19313">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> COAT: Compressing Optimizer states and Activation for Memory-Efficient FP8 Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xi%2C+H">Haocheng Xi</a>, <a href="/search/?searchtype=author&query=Cai%2C+H">Han Cai</a>, <a href="/search/?searchtype=author&query=Zhu%2C+L">Ligeng Zhu</a>, <a href="/search/?searchtype=author&query=Lu%2C+Y">Yao Lu</a>, <a href="/search/?searchtype=author&query=Keutzer%2C+K">Kurt Keutzer</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jianfei Chen</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Song Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19313v1-abstract-short" style="display: inline;"> FP8 training has emerged as a promising method for improving training efficiency. Existing frameworks accelerate training by applying FP8 computation to linear layers while leaving optimizer states and activations in higher precision, which fails to fully optimize memory usage. This paper introduces COAT (Compressing Optimizer States and Activations for FP8 Training), a novel FP8 training framewor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19313v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19313v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19313v1-abstract-full" style="display: none;"> FP8 training has emerged as a promising method for improving training efficiency. Existing frameworks accelerate training by applying FP8 computation to linear layers while leaving optimizer states and activations in higher precision, which fails to fully optimize memory usage. This paper introduces COAT (Compressing Optimizer States and Activations for FP8 Training), a novel FP8 training framework designed to significantly reduce memory footprint when training large models. COAT addresses current limitations through two key innovations: (1) Dynamic Range Expansion, which aligns optimizer state distributions more closely with the FP8 representation range, thereby reducing quantization error, and (2) Mixed-Granularity Activation Quantization, which optimizes activation memory using a combination of per-tensor and per-group quantization strategies. Experiments demonstrate that COAT effectively reduces end-to-end training memory footprint by 1.54x compared to BF16 while achieving nearly lossless performance across various tasks, such as Large Language Model pretraining and fine-tuning and Vision Language Model training. COAT also achieves a 1.43x end-to-end training speedup compared to BF16, performing on par with or surpassing TransformerEngine's speedup. COAT enables efficient full-parameter training of large models on fewer GPUs, and facilitates doubling the batch size in distributed training settings, providing a practical solution for scaling large-scale model training. The code is available at https://github.com/NVlabs/COAT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19313v1-abstract-full').style.display = 'none'; document.getElementById('2410.19313v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages. 9 Figures. 8 Tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19211">arXiv:2410.19211</a> <span> [<a href="https://arxiv.org/pdf/2410.19211">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Predicting Liquidity Coverage Ratio with Gated Recurrent Units: A Deep Learning Model for Risk Management </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xu%2C+Z">Zhen Xu</a>, <a href="/search/?searchtype=author&query=Pan%2C+J">Jingming Pan</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Siyuan Han</a>, <a href="/search/?searchtype=author&query=Ouyang%2C+H">Hongju Ouyang</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yuan Chen</a>, <a href="/search/?searchtype=author&query=Jiang%2C+M">Mohan Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19211v1-abstract-short" style="display: inline;"> With the global economic integration and the high interconnection of financial markets, financial institutions are facing unprecedented challenges, especially liquidity risk. This paper proposes a liquidity coverage ratio (LCR) prediction model based on the gated recurrent unit (GRU) network to help financial institutions manage their liquidity risk more effectively. By utilizing the GRU network i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19211v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19211v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19211v1-abstract-full" style="display: none;"> With the global economic integration and the high interconnection of financial markets, financial institutions are facing unprecedented challenges, especially liquidity risk. This paper proposes a liquidity coverage ratio (LCR) prediction model based on the gated recurrent unit (GRU) network to help financial institutions manage their liquidity risk more effectively. By utilizing the GRU network in deep learning technology, the model can automatically learn complex patterns from historical data and accurately predict LCR for a period of time in the future. The experimental results show that compared with traditional methods, the GRU model proposed in this study shows significant advantages in mean absolute error (MAE), proving its higher accuracy and robustness. This not only provides financial institutions with a more reliable liquidity risk management tool but also provides support for regulators to formulate more scientific and reasonable policies, which helps to improve the stability of the entire financial system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19211v1-abstract-full').style.display = 'none'; document.getElementById('2410.19211v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18114">arXiv:2410.18114</a> <span> [<a href="https://arxiv.org/pdf/2410.18114">pdf</a>, <a href="https://arxiv.org/format/2410.18114">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Bridging Today and the Future of Humanity: AI Safety in 2024 and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+S">Shanshan Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18114v2-abstract-short" style="display: inline;"> The growing prevalence of generative AI inevitably raises concerns regarding the associated risks and safety implications, which catalyzes significant progress in AI safety. However, as this field thrives, a critical question emerges: Are our current efforts aligned with the broader perspective of human history and civilization? This paper presents a blueprint for an advanced human society and lev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18114v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18114v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18114v2-abstract-full" style="display: none;"> The growing prevalence of generative AI inevitably raises concerns regarding the associated risks and safety implications, which catalyzes significant progress in AI safety. However, as this field thrives, a critical question emerges: Are our current efforts aligned with the broader perspective of human history and civilization? This paper presents a blueprint for an advanced human society and leverages this vision to guide contemporary AI safety efforts. It outlines a future where the Internet of Everything becomes reality, and create a roadmap of significant technological advancements towards this envisioned future. For each stage of the advancements, this paper forecasts potential AI safety issues that humanity may face. By projecting current efforts against this blueprint, we examine the alignment between the present efforts and the long-term needs. This paper identifies gaps in current approaches and highlights unique challenges and missions that demand increasing attention from AI safety practitioners in the 2020s, addressing critical areas that must not be overlooked in shaping a responsible future for AI development. This vision paper aims to offer a broader perspective on AI safety, emphasizing that our current efforts should not only address immediate concerns but also anticipate potential risks in the expanding AI landscape, thereby fostering AI's role in promoting a more secure and sustainable future for human civilization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18114v2-abstract-full').style.display = 'none'; document.getElementById('2410.18114v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18097">arXiv:2410.18097</a> <span> [<a href="https://arxiv.org/pdf/2410.18097">pdf</a>, <a href="https://arxiv.org/format/2410.18097">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> RRADistill: Distilling LLMs' Passage Ranking Ability for Long-Tail Queries Document Re-Ranking on a Search Engine </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Choi%2C+N">Nayoung Choi</a>, <a href="/search/?searchtype=author&query=Lee%2C+Y">Youngjune Lee</a>, <a href="/search/?searchtype=author&query=Cho%2C+G">Gyu-Hwung Cho</a>, <a href="/search/?searchtype=author&query=Jeong%2C+H">Haeyu Jeong</a>, <a href="/search/?searchtype=author&query=Kong%2C+J">Jungmin Kong</a>, <a href="/search/?searchtype=author&query=Kim%2C+S">Saehun Kim</a>, <a href="/search/?searchtype=author&query=Park%2C+K">Keunchan Park</a>, <a href="/search/?searchtype=author&query=Cho%2C+S">Sarah Cho</a>, <a href="/search/?searchtype=author&query=Jeong%2C+I">Inchang Jeong</a>, <a href="/search/?searchtype=author&query=Nam%2C+G">Gyohee Nam</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Sunghoon Han</a>, <a href="/search/?searchtype=author&query=Yang%2C+W">Wonil Yang</a>, <a href="/search/?searchtype=author&query=Choi%2C+J">Jaeho Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18097v3-abstract-short" style="display: inline;"> Large Language Models (LLMs) excel at understanding the semantic relationships between queries and documents, even with lengthy and complex long-tail queries. These queries are challenging for feedback-based rankings due to sparse user engagement and limited feedback, making LLMs' ranking ability highly valuable. However, the large size and slow inference of LLMs necessitate the development of sma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18097v3-abstract-full').style.display = 'inline'; document.getElementById('2410.18097v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18097v3-abstract-full" style="display: none;"> Large Language Models (LLMs) excel at understanding the semantic relationships between queries and documents, even with lengthy and complex long-tail queries. These queries are challenging for feedback-based rankings due to sparse user engagement and limited feedback, making LLMs' ranking ability highly valuable. However, the large size and slow inference of LLMs necessitate the development of smaller, more efficient models (sLLMs). Recently, integrating ranking label generation into distillation techniques has become crucial, but existing methods underutilize LLMs' capabilities and are cumbersome. Our research, RRADistill: Re-Ranking Ability Distillation, propose an efficient label generation pipeline and novel sLLM training methods for both encoder and decoder models. We introduce an encoder-based method using a Term Control Layer to capture term matching signals and a decoder-based model with a ranking layer for enhanced understanding. A/B testing on a Korean-based search platform, validates the effectiveness of our approach in improving re-ranking for long-tail queries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18097v3-abstract-full').style.display = 'none'; document.getElementById('2410.18097v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2024 Industry Track. First two authors contributed equally</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17036">arXiv:2410.17036</a> <span> [<a href="https://arxiv.org/pdf/2410.17036">pdf</a>, <a href="https://arxiv.org/format/2410.17036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Dark Matter Search Results from 4.2 Tonne-Years of Exposure of the LUX-ZEPLIN (LZ) Experiment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Aalbers%2C+J">J. Aalbers</a>, <a href="/search/?searchtype=author&query=Akerib%2C+D+S">D. S. Akerib</a>, <a href="/search/?searchtype=author&query=Musalhi%2C+A+K+A">A. K. Al Musalhi</a>, <a href="/search/?searchtype=author&query=Alder%2C+F">F. Alder</a>, <a href="/search/?searchtype=author&query=Amarasinghe%2C+C+S">C. S. Amarasinghe</a>, <a href="/search/?searchtype=author&query=Ames%2C+A">A. Ames</a>, <a href="/search/?searchtype=author&query=Anderson%2C+T+J">T. J. Anderson</a>, <a href="/search/?searchtype=author&query=Angelides%2C+N">N. Angelides</a>, <a href="/search/?searchtype=author&query=Ara%C3%BAjo%2C+H+M">H. M. Ara煤jo</a>, <a href="/search/?searchtype=author&query=Armstrong%2C+J+E">J. E. Armstrong</a>, <a href="/search/?searchtype=author&query=Arthurs%2C+M">M. Arthurs</a>, <a href="/search/?searchtype=author&query=Baker%2C+A">A. Baker</a>, <a href="/search/?searchtype=author&query=Balashov%2C+S">S. Balashov</a>, <a href="/search/?searchtype=author&query=Bang%2C+J">J. Bang</a>, <a href="/search/?searchtype=author&query=Bargemann%2C+J+W">J. W. Bargemann</a>, <a href="/search/?searchtype=author&query=Barillier%2C+E+E">E. E. Barillier</a>, <a href="/search/?searchtype=author&query=Bauer%2C+D">D. Bauer</a>, <a href="/search/?searchtype=author&query=Beattie%2C+K">K. Beattie</a>, <a href="/search/?searchtype=author&query=Benson%2C+T">T. Benson</a>, <a href="/search/?searchtype=author&query=Bhatti%2C+A">A. Bhatti</a>, <a href="/search/?searchtype=author&query=Biekert%2C+A">A. Biekert</a>, <a href="/search/?searchtype=author&query=Biesiadzinski%2C+T+P">T. P. Biesiadzinski</a>, <a href="/search/?searchtype=author&query=Birch%2C+H+J">H. J. Birch</a>, <a href="/search/?searchtype=author&query=Bishop%2C+E">E. Bishop</a>, <a href="/search/?searchtype=author&query=Blockinger%2C+G+M">G. M. Blockinger</a> , et al. (193 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17036v2-abstract-short" style="display: inline;"> We report results of a search for nuclear recoils induced by weakly interacting massive particle (WIMP) dark matter using the LUX-ZEPLIN (LZ) two-phase xenon time projection chamber. This analysis uses a total exposure of $4.2\pm0.1$ tonne-years from 280 live days of LZ operation, of which $3.3\pm0.1$ tonne-years and 220 live days are new. A technique to actively tag background electronic recoils… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17036v2-abstract-full').style.display = 'inline'; document.getElementById('2410.17036v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17036v2-abstract-full" style="display: none;"> We report results of a search for nuclear recoils induced by weakly interacting massive particle (WIMP) dark matter using the LUX-ZEPLIN (LZ) two-phase xenon time projection chamber. This analysis uses a total exposure of $4.2\pm0.1$ tonne-years from 280 live days of LZ operation, of which $3.3\pm0.1$ tonne-years and 220 live days are new. A technique to actively tag background electronic recoils from $^{214}$Pb $尾$ decays is featured for the first time. Enhanced electron-ion recombination is observed in two-neutrino double electron capture decays of $^{124}$Xe, representing a noteworthy new background. After removal of artificial signal-like events injected into the data set to mitigate analyzer bias, we find no evidence for an excess over expected backgrounds. World-leading constraints are placed on spin-independent (SI) and spin-dependent WIMP-nucleon cross sections for masses $\geq$9 GeV/$c^2$. The strongest SI exclusion set is $2.1\times10^{-48}$ cm$^{2}$ at the 90% confidence level at a mass of 36 GeV/$c^2$, and the best SI median sensitivity achieved is $5.0\times10^{-48}$ cm$^{2}$ for a mass of 40 GeV/$c^2$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17036v2-abstract-full').style.display = 'none'; document.getElementById('2410.17036v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 7 figures. See https://www.hepdata.net/record/155182 for a data release related to this paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16977">arXiv:2410.16977</a> <span> [<a href="https://arxiv.org/pdf/2410.16977">pdf</a>, <a href="https://arxiv.org/format/2410.16977">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> IPL: Leveraging Multimodal Large Language Models for Intelligent Product Listing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+K">Kang Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Q">Qingheng Zhang</a>, <a href="/search/?searchtype=author&query=Lian%2C+C">Chengbao Lian</a>, <a href="/search/?searchtype=author&query=Ji%2C+Y">Yixin Ji</a>, <a href="/search/?searchtype=author&query=Liu%2C+X">Xuwei Liu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shuguang Han</a>, <a href="/search/?searchtype=author&query=Wu%2C+G">Guoqiang Wu</a>, <a href="/search/?searchtype=author&query=Huang%2C+F">Fei Huang</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Jufeng Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16977v1-abstract-short" style="display: inline;"> Unlike professional Business-to-Consumer (B2C) e-commerce platforms (e.g., Amazon), Consumer-to-Consumer (C2C) platforms (e.g., Facebook marketplace) are mainly targeting individual sellers who usually lack sufficient experience in e-commerce. Individual sellers often struggle to compose proper descriptions for selling products. With the recent advancement of Multimodal Large Language Models (MLLM… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16977v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16977v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16977v1-abstract-full" style="display: none;"> Unlike professional Business-to-Consumer (B2C) e-commerce platforms (e.g., Amazon), Consumer-to-Consumer (C2C) platforms (e.g., Facebook marketplace) are mainly targeting individual sellers who usually lack sufficient experience in e-commerce. Individual sellers often struggle to compose proper descriptions for selling products. With the recent advancement of Multimodal Large Language Models (MLLMs), we attempt to integrate such state-of-the-art generative AI technologies into the product listing process. To this end, we develop IPL, an Intelligent Product Listing tool tailored to generate descriptions using various product attributes such as category, brand, color, condition, etc. IPL enables users to compose product descriptions by merely uploading photos of the selling product. More importantly, it can imitate the content style of our C2C platform Xianyu. This is achieved by employing domain-specific instruction tuning on MLLMs and adopting the multi-modal Retrieval-Augmented Generation (RAG) process. A comprehensive empirical evaluation demonstrates that the underlying model of IPL significantly outperforms the base model in domain-specific tasks while producing less hallucination. IPL has been successfully deployed in our production system, where 72% of users have their published product listings based on the generated content, and those product listings are shown to have a quality score 5.6% higher than those without AI assistance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16977v1-abstract-full').style.display = 'none'; document.getElementById('2410.16977v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16462">arXiv:2410.16462</a> <span> [<a href="https://arxiv.org/pdf/2410.16462">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Comparative Analysis of Human Mobility Patterns: Utilizing Taxi and Mobile (SafeGraph) Data to Investigate Neighborhood-Scale Mobility in New York City </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jiang%2C+Y">Yuqin Jiang</a>, <a href="/search/?searchtype=author&query=Li%2C+Z">Zhenlong Li</a>, <a href="/search/?searchtype=author&query=Kim%2C+J">Joon-Seok Kim</a>, <a href="/search/?searchtype=author&query=Ning%2C+H">Huan Ning</a>, <a href="/search/?searchtype=author&query=Han%2C+S+Y">Su Yeon Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16462v1-abstract-short" style="display: inline;"> Numerous researchers have utilized GPS-enabled vehicle data and SafeGraph mobility data to analyze human movements. However, the comparison of their ability to capture human mobility remains unexplored. This study investigates differences in human mobility using taxi trip records and the SafeGraph dataset in New York City neighborhoods. The analysis includes neighborhood clustering to identify pop… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16462v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16462v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16462v1-abstract-full" style="display: none;"> Numerous researchers have utilized GPS-enabled vehicle data and SafeGraph mobility data to analyze human movements. However, the comparison of their ability to capture human mobility remains unexplored. This study investigates differences in human mobility using taxi trip records and the SafeGraph dataset in New York City neighborhoods. The analysis includes neighborhood clustering to identify population characteristics and a comparative analysis of mobility patterns. Our findings show that taxi data tends to capture human mobility to and from locations such as Lower Manhattan, where taxi demand is consistently high, while often underestimating the volume of trips originating from areas with lower taxi demand, particularly in the suburbs of NYC. In contrast, SafeGraph data excels in capturing trips to and from areas where commuting by driving one's own car is common, but underestimates trips in pedestrian-heavy areas. The comparative analysis also sheds new light on transportation mode choices for trips across various neighborhoods. The results of this study underscore the importance of understanding the representativeness of human mobility big data and highlight the necessity for careful consideration when selecting the most suitable dataset for human mobility research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16462v1-abstract-full').style.display = 'none'; document.getElementById('2410.16462v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14894">arXiv:2410.14894</a> <span> [<a href="https://arxiv.org/pdf/2410.14894">pdf</a>, <a href="https://arxiv.org/format/2410.14894">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Soft-Label Integration for Robust Toxicity Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cheng%2C+Z">Zelei Cheng</a>, <a href="/search/?searchtype=author&query=Wu%2C+X">Xian Wu</a>, <a href="/search/?searchtype=author&query=Yu%2C+J">Jiahao Yu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shuo Han</a>, <a href="/search/?searchtype=author&query=Cai%2C+X">Xin-Qiang Cai</a>, <a href="/search/?searchtype=author&query=Xing%2C+X">Xinyu Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14894v2-abstract-short" style="display: inline;"> Toxicity classification in textual content remains a significant problem. Data with labels from a single annotator fall short of capturing the diversity of human perspectives. Therefore, there is a growing need to incorporate crowdsourced annotations for training an effective toxicity classifier. Additionally, the standard approach to training a classifier using empirical risk minimization (ERM) m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14894v2-abstract-full').style.display = 'inline'; document.getElementById('2410.14894v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14894v2-abstract-full" style="display: none;"> Toxicity classification in textual content remains a significant problem. Data with labels from a single annotator fall short of capturing the diversity of human perspectives. Therefore, there is a growing need to incorporate crowdsourced annotations for training an effective toxicity classifier. Additionally, the standard approach to training a classifier using empirical risk minimization (ERM) may fail to address the potential shifts between the training set and testing set due to exploiting spurious correlations. This work introduces a novel bi-level optimization framework that integrates crowdsourced annotations with the soft-labeling technique and optimizes the soft-label weights by Group Distributionally Robust Optimization (GroupDRO) to enhance the robustness against out-of-distribution (OOD) risk. We theoretically prove the convergence of our bi-level optimization algorithm. Experimental results demonstrate that our approach outperforms existing baseline methods in terms of both average and worst-group accuracy, confirming its effectiveness in leveraging crowdsourced annotations to achieve more effective and robust toxicity classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14894v2-abstract-full').style.display = 'none'; document.getElementById('2410.14894v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">38th Conference on Neural Information Processing Systems (NeurIPS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13987">arXiv:2410.13987</a> <span> [<a href="https://arxiv.org/pdf/2410.13987">pdf</a>, <a href="https://arxiv.org/format/2410.13987">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RiTeK: A Dataset for Large Language Models Complex Reasoning over Textual Knowledge Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+J">Jiatan Huang</a>, <a href="/search/?searchtype=author&query=Li%2C+M">Mingchen Li</a>, <a href="/search/?searchtype=author&query=Yao%2C+Z">Zonghai Yao</a>, <a href="/search/?searchtype=author&query=Yang%2C+Z">Zhichao Yang</a>, <a href="/search/?searchtype=author&query=Xiao%2C+Y">Yongkang Xiao</a>, <a href="/search/?searchtype=author&query=Ouyang%2C+F">Feiyun Ouyang</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xiaohan Li</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shuo Han</a>, <a href="/search/?searchtype=author&query=Yu%2C+H">Hong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13987v1-abstract-short" style="display: inline;"> Answering complex real-world questions often requires accurate retrieval from textual knowledge graphs (TKGs). The scarcity of annotated data, along with intricate topological structures, makes this task particularly challenging. As the nature of relational path information could enhance the inference ability of Large Language Models (LLMs), efficiently retrieving more complex relational path info… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13987v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13987v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13987v1-abstract-full" style="display: none;"> Answering complex real-world questions often requires accurate retrieval from textual knowledge graphs (TKGs). The scarcity of annotated data, along with intricate topological structures, makes this task particularly challenging. As the nature of relational path information could enhance the inference ability of Large Language Models (LLMs), efficiently retrieving more complex relational path information from TKGs presents another key challenge. To tackle these challenges, we first develop a Dataset for LLMs Complex Reasoning over Textual Knowledge Graphs (RiTeK) with a broad topological structure coverage.We synthesize realistic user queries that integrate diverse topological structures, relational information, and complex textual descriptions. We conduct rigorous expert evaluation to validate the quality of our synthesized queries. And then, we introduce an enhanced Monte Carlo Tree Search (MCTS) method, Relational MCTS, to automatically extract relational path information from textual graphs for specific queries. Our dataset mainly covers the medical domain as the relation types and entity are complex and publicly available. Experimental results indicate that RiTeK poses significant challenges for current retrieval and LLM systems, while the proposed Relational MCTS method enhances LLM inference ability and achieves state-of-the-art performance on RiTeK. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13987v1-abstract-full').style.display = 'none'; document.getElementById('2410.13987v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12875">arXiv:2410.12875</a> <span> [<a href="https://arxiv.org/pdf/2410.12875">pdf</a>, <a href="https://arxiv.org/ps/2410.12875">ps</a>, <a href="https://arxiv.org/format/2410.12875">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Analysis of PDEs">math.AP</span> </div> </div> <p class="title is-5 mathjax"> The method of $a$-contraction with shifts used for long-time behavior toward viscous shock </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+S">Sungho Han</a>, <a href="/search/?searchtype=author&query=Kang%2C+M">Moon-Jin Kang</a>, <a href="/search/?searchtype=author&query=Lee%2C+H">Hobin Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12875v1-abstract-short" style="display: inline;"> We revisit the method of $a$-contraction with shifts used for long-time behavior of barotropic Navier-Stokes flows perturbed from a Riemann shock. For the usage of the method of $a$-contraction with shifts, we do not employ the effective velocity $h$ variable even for higher order estimates. This approach would be important when handling the barotropic Navier-Stokes system with other effects, for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12875v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12875v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12875v1-abstract-full" style="display: none;"> We revisit the method of $a$-contraction with shifts used for long-time behavior of barotropic Navier-Stokes flows perturbed from a Riemann shock. For the usage of the method of $a$-contraction with shifts, we do not employ the effective velocity $h$ variable even for higher order estimates. This approach would be important when handling the barotropic Navier-Stokes system with other effects, for example, such as capillary effect and boundary effect. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12875v1-abstract-full').style.display = 'none'; document.getElementById('2410.12875v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is submitted to the proceedings of MSJ-KMS Joint Meeting 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12164">arXiv:2410.12164</a> <span> [<a href="https://arxiv.org/pdf/2410.12164">pdf</a>, <a href="https://arxiv.org/format/2410.12164">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Table-LLM-Specialist: Language Model Specialists for Tables using Iterative Generator-Validator Fine-tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xing%2C+J">Junjie Xing</a>, <a href="/search/?searchtype=author&query=He%2C+Y">Yeye He</a>, <a href="/search/?searchtype=author&query=Zhou%2C+M">Mengyu Zhou</a>, <a href="/search/?searchtype=author&query=Dong%2C+H">Haoyu Dong</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shi Han</a>, <a href="/search/?searchtype=author&query=Zhang%2C+D">Dongmei Zhang</a>, <a href="/search/?searchtype=author&query=Chaudhuri%2C+S">Surajit Chaudhuri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12164v1-abstract-short" style="display: inline;"> In this work, we propose Table-LLM-Specialist, or Table-Specialist for short, as a new self-trained fine-tuning paradigm specifically designed for table tasks. Our insight is that for each table task, there often exist two dual versions of the same task, one generative and one classification in nature. Leveraging their duality, we propose a Generator-Validator paradigm, to iteratively generate-the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12164v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12164v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12164v1-abstract-full" style="display: none;"> In this work, we propose Table-LLM-Specialist, or Table-Specialist for short, as a new self-trained fine-tuning paradigm specifically designed for table tasks. Our insight is that for each table task, there often exist two dual versions of the same task, one generative and one classification in nature. Leveraging their duality, we propose a Generator-Validator paradigm, to iteratively generate-then-validate training data from language-models, to fine-tune stronger \sys models that can specialize in a given task, without requiring manually-labeled data. Our extensive evaluations suggest that our Table-Specialist has (1) \textit{strong performance} on diverse table tasks over vanilla language-models -- for example, Table-Specialist fine-tuned on GPT-3.5 not only outperforms vanilla GPT-3.5, but can often match or surpass GPT-4 level quality, (2) \textit{lower cost} to deploy, because when Table-Specialist fine-tuned on GPT-3.5 achieve GPT-4 level quality, it becomes possible to deploy smaller models with lower latency and inference cost, with comparable quality, and (3) \textit{better generalizability} when evaluated across multiple benchmarks, since \sys is fine-tuned on a broad range of training data systematically generated from diverse real tables. Our code and data will be available at https://github.com/microsoft/Table-LLM-Specialist. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12164v1-abstract-full').style.display = 'none'; document.getElementById('2410.12164v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12160">arXiv:2410.12160</a> <span> [<a href="https://arxiv.org/pdf/2410.12160">pdf</a>, <a href="https://arxiv.org/format/2410.12160">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> When to Trust Your Data: Enhancing Dyna-Style Model-Based Reinforcement Learning With Data Filter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+Y">Yansong Li</a>, <a href="/search/?searchtype=author&query=Dong%2C+Z">Zeyu Dong</a>, <a href="/search/?searchtype=author&query=Luo%2C+E">Ertai Luo</a>, <a href="/search/?searchtype=author&query=Wu%2C+Y">Yu Wu</a>, <a href="/search/?searchtype=author&query=Wu%2C+S">Shuo Wu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shuo Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12160v1-abstract-short" style="display: inline;"> Reinforcement learning (RL) algorithms can be divided into two classes: model-free algorithms, which are sample-inefficient, and model-based algorithms, which suffer from model bias. Dyna-style algorithms combine these two approaches by using simulated data from an estimated environmental model to accelerate model-free training. However, their efficiency is compromised when the estimated model is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12160v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12160v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12160v1-abstract-full" style="display: none;"> Reinforcement learning (RL) algorithms can be divided into two classes: model-free algorithms, which are sample-inefficient, and model-based algorithms, which suffer from model bias. Dyna-style algorithms combine these two approaches by using simulated data from an estimated environmental model to accelerate model-free training. However, their efficiency is compromised when the estimated model is inaccurate. Previous works address this issue by using model ensembles or pretraining the estimated model with data collected from the real environment, increasing computational and sample complexity. To tackle this issue, we introduce an out-of-distribution (OOD) data filter that removes simulated data from the estimated model that significantly diverges from data collected in the real environment. We show theoretically that this technique enhances the quality of simulated data. With the help of the OOD data filter, the data simulated from the estimated model better mimics the data collected by interacting with the real model. This improvement is evident in the critic updates compared to using the simulated data without the OOD data filter. Our experiment integrates the data filter into the model-based policy optimization (MBPO) algorithm. The results demonstrate that our method requires fewer interactions with the real environment to achieve a higher level of optimality than MBPO, even without a model ensemble. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12160v1-abstract-full').style.display = 'none'; document.getElementById('2410.12160v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11718">arXiv:2410.11718</a> <span> [<a href="https://arxiv.org/pdf/2410.11718">pdf</a>, <a href="https://arxiv.org/format/2410.11718">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Converging to a Lingua Franca: Evolution of Linguistic Regions and Semantics Alignment in Multilingual Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zeng%2C+H">Hongchuan Zeng</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Senyu Han</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Lu Chen</a>, <a href="/search/?searchtype=author&query=Yu%2C+K">Kai Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11718v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable performance, particularly in multilingual contexts. While recent studies suggest that LLMs can transfer skills learned in one language to others, the internal mechanisms behind this ability remain unclear. We observed that the neuron activation patterns of LLMs exhibit similarities when processing the same language, revealing the existence… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11718v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11718v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11718v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable performance, particularly in multilingual contexts. While recent studies suggest that LLMs can transfer skills learned in one language to others, the internal mechanisms behind this ability remain unclear. We observed that the neuron activation patterns of LLMs exhibit similarities when processing the same language, revealing the existence and location of key linguistic regions. Additionally, we found that neuron activation patterns are similar when processing sentences with the same semantic meaning in different languages. This indicates that LLMs map semantically identical inputs from different languages into a "Lingua Franca", a common semantic latent space that allows for consistent processing across languages. This semantic alignment becomes more pronounced with training and increased model size, resulting in a more language-agnostic activation pattern. Moreover, we found that key linguistic neurons are concentrated in the first and last layers of LLMs, becoming denser in the first layers as training progresses. Experiments on BLOOM and LLaMA2 support these findings, highlighting the structural evolution of multilingual LLMs during training and scaling up. This paper provides insights into the internal workings of LLMs, offering a foundation for future improvements in their cross-lingual capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11718v1-abstract-full').style.display = 'none'; document.getElementById('2410.11718v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 11 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11207">arXiv:2410.11207</a> <span> [<a href="https://arxiv.org/pdf/2410.11207">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Cross-Dataset Generalization in Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+X">Xuyu Zhang</a>, <a href="/search/?searchtype=author&query=Huang%2C+H">Haofan Huang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+D">Dawei Zhang</a>, <a href="/search/?searchtype=author&query=Zhuang%2C+S">Songlin Zhuang</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shensheng Han</a>, <a href="/search/?searchtype=author&query=Lai%2C+P">Puxiang Lai</a>, <a href="/search/?searchtype=author&query=Liu%2C+H">Honglin Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11207v1-abstract-short" style="display: inline;"> Deep learning has been extensively used in various fields, such as phase imaging, 3D imaging reconstruction, phase unwrapping, and laser speckle reduction, particularly for complex problems that lack analytic models. Its data-driven nature allows for implicit construction of mathematical relationships within the network through training with abundant data. However, a critical challenge in practica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11207v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11207v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11207v1-abstract-full" style="display: none;"> Deep learning has been extensively used in various fields, such as phase imaging, 3D imaging reconstruction, phase unwrapping, and laser speckle reduction, particularly for complex problems that lack analytic models. Its data-driven nature allows for implicit construction of mathematical relationships within the network through training with abundant data. However, a critical challenge in practical applications is the generalization issue, where a network trained on one dataset struggles to recognize an unknown target from a different dataset. In this study, we investigate imaging through scattering media and discover that the mathematical relationship learned by the network is an approximation dependent on the training dataset, rather than the true mapping relationship of the model. We demonstrate that enhancing the diversity of the training dataset can improve this approximation, thereby achieving generalization across different datasets, as the mapping relationship of a linear physical model is independent of inputs. This study elucidates the nature of generalization across different datasets and provides insights into the design of training datasets to ultimately address the generalization issue in various deep learning-based applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11207v1-abstract-full').style.display = 'none'; document.getElementById('2410.11207v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11119">arXiv:2410.11119</a> <span> [<a href="https://arxiv.org/pdf/2410.11119">pdf</a>, <a href="https://arxiv.org/format/2410.11119">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ChuLo: Chunk-Level Key Information Representation for Long Document Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/?searchtype=author&query=Han%2C+S+C">Soyeon Caren Han</a>, <a href="/search/?searchtype=author&query=Dai%2C+Y">Yue Dai</a>, <a href="/search/?searchtype=author&query=Cao%2C+F">Feiqi Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11119v3-abstract-short" style="display: inline;"> Transformer-based models have achieved remarkable success in various Natural Language Processing (NLP) tasks, yet their ability to handle long documents is constrained by computational limitations. Traditional approaches, such as truncating inputs, sparse self-attention, and chunking, attempt to mitigate these issues, but they often lead to information loss and hinder the model's ability to captur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11119v3-abstract-full').style.display = 'inline'; document.getElementById('2410.11119v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11119v3-abstract-full" style="display: none;"> Transformer-based models have achieved remarkable success in various Natural Language Processing (NLP) tasks, yet their ability to handle long documents is constrained by computational limitations. Traditional approaches, such as truncating inputs, sparse self-attention, and chunking, attempt to mitigate these issues, but they often lead to information loss and hinder the model's ability to capture long-range dependencies. In this paper, we introduce ChuLo, a novel chunk representation method for long document classification that addresses these limitations. Our ChuLo groups input tokens using unsupervised keyphrase extraction, emphasizing semantically important keyphrase based chunk to retain core document content while reducing input length. This approach minimizes information loss and improves the efficiency of Transformer-based models. Preserving all tokens in long document understanding, especially token classification tasks, is especially important to ensure that fine-grained annotations, which depend on the entire sequence context, are not lost. We evaluate our method on multiple long document classification tasks and long document token classification tasks, demonstrating its effectiveness through comprehensive qualitative and quantitative analyses. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11119v3-abstract-full').style.display = 'none'; document.getElementById('2410.11119v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper has been submitted to a conference and is currently under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10819">arXiv:2410.10819</a> <span> [<a href="https://arxiv.org/pdf/2410.10819">pdf</a>, <a href="https://arxiv.org/format/2410.10819">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DuoAttention: Efficient Long-Context LLM Inference with Retrieval and Streaming Heads </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xiao%2C+G">Guangxuan Xiao</a>, <a href="/search/?searchtype=author&query=Tang%2C+J">Jiaming Tang</a>, <a href="/search/?searchtype=author&query=Zuo%2C+J">Jingwei Zuo</a>, <a href="/search/?searchtype=author&query=Guo%2C+J">Junxian Guo</a>, <a href="/search/?searchtype=author&query=Yang%2C+S">Shang Yang</a>, <a href="/search/?searchtype=author&query=Tang%2C+H">Haotian Tang</a>, <a href="/search/?searchtype=author&query=Fu%2C+Y">Yao Fu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Song Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10819v1-abstract-short" style="display: inline;"> Deploying long-context large language models (LLMs) is essential but poses significant computational and memory challenges. Caching all Key and Value (KV) states across all attention heads consumes substantial memory. Existing KV cache pruning methods either damage the long-context capabilities of LLMs or offer only limited efficiency improvements. In this paper, we identify that only a fraction o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10819v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10819v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10819v1-abstract-full" style="display: none;"> Deploying long-context large language models (LLMs) is essential but poses significant computational and memory challenges. Caching all Key and Value (KV) states across all attention heads consumes substantial memory. Existing KV cache pruning methods either damage the long-context capabilities of LLMs or offer only limited efficiency improvements. In this paper, we identify that only a fraction of attention heads, a.k.a, Retrieval Heads, are critical for processing long contexts and require full attention across all tokens. In contrast, all other heads, which primarily focus on recent tokens and attention sinks--referred to as Streaming Heads--do not require full attention. Based on this insight, we introduce DuoAttention, a framework that only applies a full KV cache to retrieval heads while using a light-weight, constant-length KV cache for streaming heads, which reduces both LLM's decoding and pre-filling memory and latency without compromising its long-context abilities. DuoAttention uses a lightweight, optimization-based algorithm with synthetic data to identify retrieval heads accurately. Our method significantly reduces long-context inference memory by up to 2.55x for MHA and 1.67x for GQA models while speeding up decoding by up to 2.18x and 1.50x and accelerating pre-filling by up to 1.73x and 1.63x for MHA and GQA models, respectively, with minimal accuracy loss compared to full attention. Notably, combined with quantization, DuoAttention enables Llama-3-8B decoding with 3.3 million context length on a single A100 GPU. Code is provided in https://github.com/mit-han-lab/duo-attention. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10819v1-abstract-full').style.display = 'none'; document.getElementById('2410.10819v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10812">arXiv:2410.10812</a> <span> [<a href="https://arxiv.org/pdf/2410.10812">pdf</a>, <a href="https://arxiv.org/format/2410.10812">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> HART: Efficient Visual Generation with Hybrid Autoregressive Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tang%2C+H">Haotian Tang</a>, <a href="/search/?searchtype=author&query=Wu%2C+Y">Yecheng Wu</a>, <a href="/search/?searchtype=author&query=Yang%2C+S">Shang Yang</a>, <a href="/search/?searchtype=author&query=Xie%2C+E">Enze Xie</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Junsong Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Junyu Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhuoyang Zhang</a>, <a href="/search/?searchtype=author&query=Cai%2C+H">Han Cai</a>, <a href="/search/?searchtype=author&query=Lu%2C+Y">Yao Lu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Song Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10812v1-abstract-short" style="display: inline;"> We introduce Hybrid Autoregressive Transformer (HART), an autoregressive (AR) visual generation model capable of directly generating 1024x1024 images, rivaling diffusion models in image generation quality. Existing AR models face limitations due to the poor image reconstruction quality of their discrete tokenizers and the prohibitive training costs associated with generating 1024px images. To addr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10812v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10812v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10812v1-abstract-full" style="display: none;"> We introduce Hybrid Autoregressive Transformer (HART), an autoregressive (AR) visual generation model capable of directly generating 1024x1024 images, rivaling diffusion models in image generation quality. Existing AR models face limitations due to the poor image reconstruction quality of their discrete tokenizers and the prohibitive training costs associated with generating 1024px images. To address these challenges, we present the hybrid tokenizer, which decomposes the continuous latents from the autoencoder into two components: discrete tokens representing the big picture and continuous tokens representing the residual components that cannot be represented by the discrete tokens. The discrete component is modeled by a scalable-resolution discrete AR model, while the continuous component is learned with a lightweight residual diffusion module with only 37M parameters. Compared with the discrete-only VAR tokenizer, our hybrid approach improves reconstruction FID from 2.11 to 0.30 on MJHQ-30K, leading to a 31% generation FID improvement from 7.85 to 5.38. HART also outperforms state-of-the-art diffusion models in both FID and CLIP score, with 4.5-7.7x higher throughput and 6.9-13.4x lower MACs. Our code is open sourced at https://github.com/mit-han-lab/hart. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10812v1-abstract-full').style.display = 'none'; document.getElementById('2410.10812v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Demo: https://hart.mit.edu. The first two authors contributed equally to this work</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10758">arXiv:2410.10758</a> <span> [<a href="https://arxiv.org/pdf/2410.10758">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Arrhythmia Classification Using Graph Neural Networks Based on Correlation Matrix </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+S">Seungwoo Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10758v3-abstract-short" style="display: inline;"> With the advancements in graph neural network, there has been increasing interest in applying this network to ECG signal analysis. In this study, we generated an adjacency matrix using correlation matrix of extracted features and applied a graph neural network to classify arrhythmias. The proposed model was compared with existing approaches from the literature. The results demonstrated that precis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10758v3-abstract-full').style.display = 'inline'; document.getElementById('2410.10758v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10758v3-abstract-full" style="display: none;"> With the advancements in graph neural network, there has been increasing interest in applying this network to ECG signal analysis. In this study, we generated an adjacency matrix using correlation matrix of extracted features and applied a graph neural network to classify arrhythmias. The proposed model was compared with existing approaches from the literature. The results demonstrated that precision and recall for all arrhythmia classes exceeded 50%, suggesting that this method can be considered an approach for arrhythmia classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10758v3-abstract-full').style.display = 'none'; document.getElementById('2410.10758v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for BIBM 2024 AIBH Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10733">arXiv:2410.10733</a> <span> [<a href="https://arxiv.org/pdf/2410.10733">pdf</a>, <a href="https://arxiv.org/format/2410.10733">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Deep Compression Autoencoder for Efficient High-Resolution Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+J">Junyu Chen</a>, <a href="/search/?searchtype=author&query=Cai%2C+H">Han Cai</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Junsong Chen</a>, <a href="/search/?searchtype=author&query=Xie%2C+E">Enze Xie</a>, <a href="/search/?searchtype=author&query=Yang%2C+S">Shang Yang</a>, <a href="/search/?searchtype=author&query=Tang%2C+H">Haotian Tang</a>, <a href="/search/?searchtype=author&query=Li%2C+M">Muyang Li</a>, <a href="/search/?searchtype=author&query=Lu%2C+Y">Yao Lu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Song Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10733v2-abstract-short" style="display: inline;"> We present Deep Compression Autoencoder (DC-AE), a new family of autoencoder models for accelerating high-resolution diffusion models. Existing autoencoder models have demonstrated impressive results at a moderate spatial compression ratio (e.g., 8x), but fail to maintain satisfactory reconstruction accuracy for high spatial compression ratios (e.g., 64x). We address this challenge by introducing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10733v2-abstract-full').style.display = 'inline'; document.getElementById('2410.10733v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10733v2-abstract-full" style="display: none;"> We present Deep Compression Autoencoder (DC-AE), a new family of autoencoder models for accelerating high-resolution diffusion models. Existing autoencoder models have demonstrated impressive results at a moderate spatial compression ratio (e.g., 8x), but fail to maintain satisfactory reconstruction accuracy for high spatial compression ratios (e.g., 64x). We address this challenge by introducing two key techniques: (1) Residual Autoencoding, where we design our models to learn residuals based on the space-to-channel transformed features to alleviate the optimization difficulty of high spatial-compression autoencoders; (2) Decoupled High-Resolution Adaptation, an efficient decoupled three-phases training strategy for mitigating the generalization penalty of high spatial-compression autoencoders. With these designs, we improve the autoencoder's spatial compression ratio up to 128 while maintaining the reconstruction quality. Applying our DC-AE to latent diffusion models, we achieve significant speedup without accuracy drop. For example, on ImageNet 512x512, our DC-AE provides 19.1x inference speedup and 17.9x training speedup on H100 GPU for UViT-H while achieving a better FID, compared with the widely used SD-VAE-f8 autoencoder. Our code is available at https://github.com/mit-han-lab/efficientvit. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10733v2-abstract-full').style.display = 'none'; document.getElementById('2410.10733v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint. First two authors contributed equally to this work. Update: add diffusion model scaling results</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10629">arXiv:2410.10629</a> <span> [<a href="https://arxiv.org/pdf/2410.10629">pdf</a>, <a href="https://arxiv.org/format/2410.10629">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SANA: Efficient High-Resolution Image Synthesis with Linear Diffusion Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xie%2C+E">Enze Xie</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Junsong Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+J">Junyu Chen</a>, <a href="/search/?searchtype=author&query=Cai%2C+H">Han Cai</a>, <a href="/search/?searchtype=author&query=Tang%2C+H">Haotian Tang</a>, <a href="/search/?searchtype=author&query=Lin%2C+Y">Yujun Lin</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhekai Zhang</a>, <a href="/search/?searchtype=author&query=Li%2C+M">Muyang Li</a>, <a href="/search/?searchtype=author&query=Zhu%2C+L">Ligeng Zhu</a>, <a href="/search/?searchtype=author&query=Lu%2C+Y">Yao Lu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Song Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10629v3-abstract-short" style="display: inline;"> We introduce Sana, a text-to-image framework that can efficiently generate images up to 4096$\times$4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU. Core designs include: (1) Deep compression autoencoder: unlike traditional AEs, which compress images only 8$\times$, we trained an AE that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10629v3-abstract-full').style.display = 'inline'; document.getElementById('2410.10629v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10629v3-abstract-full" style="display: none;"> We introduce Sana, a text-to-image framework that can efficiently generate images up to 4096$\times$4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU. Core designs include: (1) Deep compression autoencoder: unlike traditional AEs, which compress images only 8$\times$, we trained an AE that can compress images 32$\times$, effectively reducing the number of latent tokens. (2) Linear DiT: we replace all vanilla attention in DiT with linear attention, which is more efficient at high resolutions without sacrificing quality. (3) Decoder-only text encoder: we replaced T5 with modern decoder-only small LLM as the text encoder and designed complex human instruction with in-context learning to enhance the image-text alignment. (4) Efficient training and sampling: we propose Flow-DPM-Solver to reduce sampling steps, with efficient caption labeling and selection to accelerate convergence. As a result, Sana-0.6B is very competitive with modern giant diffusion model (e.g. Flux-12B), being 20 times smaller and 100+ times faster in measured throughput. Moreover, Sana-0.6B can be deployed on a 16GB laptop GPU, taking less than 1 second to generate a 1024$\times$1024 resolution image. Sana enables content creation at low cost. Code and model will be publicly released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10629v3-abstract-full').style.display = 'none'; document.getElementById('2410.10629v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Technical Report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10139">arXiv:2410.10139</a> <span> [<a href="https://arxiv.org/pdf/2410.10139">pdf</a>, <a href="https://arxiv.org/format/2410.10139">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MMIE: Massive Multimodal Interleaved Comprehension Benchmark for Large Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xia%2C+P">Peng Xia</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Siwei Han</a>, <a href="/search/?searchtype=author&query=Qiu%2C+S">Shi Qiu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Y">Yiyang Zhou</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhaoyang Wang</a>, <a href="/search/?searchtype=author&query=Zheng%2C+W">Wenhao Zheng</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhaorun Chen</a>, <a href="/search/?searchtype=author&query=Cui%2C+C">Chenhang Cui</a>, <a href="/search/?searchtype=author&query=Ding%2C+M">Mingyu Ding</a>, <a href="/search/?searchtype=author&query=Li%2C+L">Linjie Li</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Lijuan Wang</a>, <a href="/search/?searchtype=author&query=Yao%2C+H">Huaxiu Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10139v1-abstract-short" style="display: inline;"> Interleaved multimodal comprehension and generation, enabling models to produce and interpret both images and text in arbitrary sequences, have become a pivotal area in multimodal learning. Despite significant advancements, the evaluation of this capability remains insufficient. Existing benchmarks suffer from limitations in data scale, scope, and evaluation depth, while current evaluation metrics… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10139v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10139v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10139v1-abstract-full" style="display: none;"> Interleaved multimodal comprehension and generation, enabling models to produce and interpret both images and text in arbitrary sequences, have become a pivotal area in multimodal learning. Despite significant advancements, the evaluation of this capability remains insufficient. Existing benchmarks suffer from limitations in data scale, scope, and evaluation depth, while current evaluation metrics are often costly or biased, lacking in reliability for practical applications. To address these challenges, we introduce MMIE, a large-scale knowledge-intensive benchmark for evaluating interleaved multimodal comprehension and generation in Large Vision-Language Models (LVLMs). MMIE comprises 20K meticulously curated multimodal queries, spanning 3 categories, 12 fields, and 102 subfields, including mathematics, coding, physics, literature, health, and arts. It supports both interleaved inputs and outputs, offering a mix of multiple-choice and open-ended question formats to evaluate diverse competencies. Moreover, we propose a reliable automated evaluation metric, leveraging a scoring model fine-tuned with human-annotated data and systematic evaluation criteria, aimed at reducing bias and improving evaluation accuracy. Extensive experiments demonstrate the effectiveness of our benchmark and metrics in providing a comprehensive evaluation of interleaved LVLMs. Specifically, we evaluate eight LVLMs, revealing that even the best models show significant room for improvement, with most achieving only moderate results. We believe MMIE will drive further advancements in the development of interleaved LVLMs. We publicly release our benchmark and code in https://mmie-bench.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10139v1-abstract-full').style.display = 'none'; document.getElementById('2410.10139v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10077">arXiv:2410.10077</a> <span> [<a href="https://arxiv.org/pdf/2410.10077">pdf</a>, <a href="https://arxiv.org/format/2410.10077">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> A physical random walk for space-fractional diffusion on finite domains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Angstmann%2C+C+N">Christopher N. Angstmann</a>, <a href="/search/?searchtype=author&query=Han%2C+D+S">Daniel S. Han</a>, <a href="/search/?searchtype=author&query=Henry%2C+B+I">Bruce I. Henry</a>, <a href="/search/?searchtype=author&query=Huang%2C+B+Z">Boris Z. Huang</a>, <a href="/search/?searchtype=author&query=Xu%2C+Z">Zhuang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10077v1-abstract-short" style="display: inline;"> We formulate a compounded random walk that is physically well defined on both finite and infinite domains, and samples space-dependent forces throughout jumps. The governing evolution equation for the walk limits to a space-fractional Fokker-Planck equation valid on bounded domains, and recovers the well known superdiffusive space-fractional diffusion equation on infinite domains. This compounded… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10077v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10077v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10077v1-abstract-full" style="display: none;"> We formulate a compounded random walk that is physically well defined on both finite and infinite domains, and samples space-dependent forces throughout jumps. The governing evolution equation for the walk limits to a space-fractional Fokker-Planck equation valid on bounded domains, and recovers the well known superdiffusive space-fractional diffusion equation on infinite domains. This compounded random walk, and its associated fractional Fokker-Planck equation, provides a major advance for modelling space-fractional diffusion through potential fields and on finite domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10077v1-abstract-full').style.display = 'none'; document.getElementById('2410.10077v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09207">arXiv:2410.09207</a> <span> [<a href="https://arxiv.org/pdf/2410.09207">pdf</a>, <a href="https://arxiv.org/format/2410.09207">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> P-FOLIO: Evaluating and Improving Logical Reasoning with Abundant Human-Written Reasoning Chains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+S">Simeng Han</a>, <a href="/search/?searchtype=author&query=Yu%2C+A">Aaron Yu</a>, <a href="/search/?searchtype=author&query=Shen%2C+R">Rui Shen</a>, <a href="/search/?searchtype=author&query=Qi%2C+Z">Zhenting Qi</a>, <a href="/search/?searchtype=author&query=Riddell%2C+M">Martin Riddell</a>, <a href="/search/?searchtype=author&query=Zhou%2C+W">Wenfei Zhou</a>, <a href="/search/?searchtype=author&query=Qiao%2C+Y">Yujie Qiao</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Y">Yilun Zhao</a>, <a href="/search/?searchtype=author&query=Yavuz%2C+S">Semih Yavuz</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Ye Liu</a>, <a href="/search/?searchtype=author&query=Joty%2C+S">Shafiq Joty</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Y">Yingbo Zhou</a>, <a href="/search/?searchtype=author&query=Xiong%2C+C">Caiming Xiong</a>, <a href="/search/?searchtype=author&query=Radev%2C+D">Dragomir Radev</a>, <a href="/search/?searchtype=author&query=Ying%2C+R">Rex Ying</a>, <a href="/search/?searchtype=author&query=Cohan%2C+A">Arman Cohan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09207v1-abstract-short" style="display: inline;"> Existing methods on understanding the capabilities of LLMs in logical reasoning rely on binary entailment classification or synthetically derived rationales, which are not sufficient for proper investigation of model's capabilities. We present P-FOLIO, a human-annotated dataset consisting of diverse and complex reasoning chains for a set of realistic logical reasoning stories also written by human… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09207v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09207v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09207v1-abstract-full" style="display: none;"> Existing methods on understanding the capabilities of LLMs in logical reasoning rely on binary entailment classification or synthetically derived rationales, which are not sufficient for proper investigation of model's capabilities. We present P-FOLIO, a human-annotated dataset consisting of diverse and complex reasoning chains for a set of realistic logical reasoning stories also written by humans. P-FOLIO is collected with an annotation protocol that facilitates humans to annotate well-structured natural language proofs for first-order logic reasoning problems in a step-by-step manner. The number of reasoning steps in P-FOLIO span from 0 to 20. We further use P-FOLIO to evaluate and improve large-language-model (LLM) reasoning capabilities. We evaluate LLM reasoning capabilities at a fine granularity via single-step inference rule classification, with more diverse inference rules of more diverse and higher levels of complexities than previous works. Given that a single model-generated reasoning chain could take a completely different path than the human-annotated one, we sample multiple reasoning chains from a model and use pass@k metrics for evaluating the quality of model-generated reasoning chains. We show that human-written reasoning chains significantly boost the logical reasoning capabilities of LLMs via many-shot prompting and fine-tuning. Furthermore, fine-tuning Llama3-7B on P-FOLIO improves the model performance by 10% or more on three other out-of-domain logical reasoning datasets. We also conduct detailed analysis to show where most powerful LLMs fall short in reasoning. We will release the dataset and code publicly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09207v1-abstract-full').style.display = 'none'; document.getElementById('2410.09207v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06767">arXiv:2410.06767</a> <span> [<a href="https://arxiv.org/pdf/2410.06767">pdf</a>, <a href="https://arxiv.org/ps/2410.06767">ps</a>, <a href="https://arxiv.org/format/2410.06767">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> On the Performance of Pilot-Aided Simultaneous Communication and Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+S">Shuaishuai Han</a>, <a href="/search/?searchtype=author&query=Alsusa%2C+E">Emad Alsusa</a>, <a href="/search/?searchtype=author&query=Al-Jarrah%2C+M+A">Mohammad Ahmad Al-Jarrah</a>, <a href="/search/?searchtype=author&query=AlaaEldin%2C+M">Mahmoud AlaaEldin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06767v1-abstract-short" style="display: inline;"> In this paper, the symbol error rate performance analysis is provided for a pilot-aided simultaneous communication and tracking (PASCAT) system. In specific, we employ multiple drones to actively transmit signals to a BS, which is responsible for continuously monitoring the location of drones over time and decoding the symbols transmitted from the drones. It is found that the estimated location pa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06767v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06767v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06767v1-abstract-full" style="display: none;"> In this paper, the symbol error rate performance analysis is provided for a pilot-aided simultaneous communication and tracking (PASCAT) system. In specific, we employ multiple drones to actively transmit signals to a BS, which is responsible for continuously monitoring the location of drones over time and decoding the symbols transmitted from the drones. It is found that the estimated location parameters at a given moment during tracking follow Gaussian distributions with means equal to actual values and variances equal to root mean square error (RMSE). Afterwards, the obtained location information is employed for informing the channel information, which is then used to preprocess the received signal before decoding by using the maximum ratio combining (MRC) technique. The average symbol error rate (SER) is also evaluated over the distribution of the estimated location parameters and an approximate value for the average SER is obtained by using a Taylor approximation with fast convergence. The result indicates that there is a cooperation relationship between the RMSE of the estimated location parameters and the average SER. In addition, the effect of the number of pilot signals is analysed as well. By employing more pilots, it is found that both communication and sensing functionalities are enhanced. Furthermore, the SER performance of our PASCAT system is similar to that of maximum likelihood detection (MLD) when a number of pilot signals are employed, which demonstrates the efficiency of the PASCAT system. In the end, all results are validated by using Monte Carlo simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06767v1-abstract-full').style.display = 'none'; document.getElementById('2410.06767v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06301">arXiv:2410.06301</a> <span> [<a href="https://arxiv.org/pdf/2410.06301">pdf</a>, <a href="https://arxiv.org/ps/2410.06301">ps</a>, <a href="https://arxiv.org/format/2410.06301">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Versatile optical accordion lattices using binary phase transmission gratings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+H+S">Hyok Sang Han</a>, <a href="/search/?searchtype=author&query=Lee%2C+A">Ahreum Lee</a>, <a href="/search/?searchtype=author&query=Subhankar%2C+S">Sarthak Subhankar</a>, <a href="/search/?searchtype=author&query=Rolston%2C+S+L">S. L. Rolston</a>, <a href="/search/?searchtype=author&query=Fatemi%2C+F+K">Fredrik K. Fatemi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06301v1-abstract-short" style="display: inline;"> Optical accordion lattices are routinely used in quantum simulation and quantum computation experiments to tune optical lattice spacings. Here, we present a technique for creating tunable optical lattices using binary-phase transmission gratings. Lattices generated using this technique have high uniformity, contrast, lattice spacing tunability, and power efficiencies. These attributes are crucial… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06301v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06301v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06301v1-abstract-full" style="display: none;"> Optical accordion lattices are routinely used in quantum simulation and quantum computation experiments to tune optical lattice spacings. Here, we present a technique for creating tunable optical lattices using binary-phase transmission gratings. Lattices generated using this technique have high uniformity, contrast, lattice spacing tunability, and power efficiencies. These attributes are crucial for exploring collective quantum phenomena in highly ordered atomic arrays coupled to optical waveguides for quantum networking and quantum simulation. In this paper, we demonstrate adjustable-period lattices that are ideally suited for use with optical nanofibers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06301v1-abstract-full').style.display = 'none'; document.getElementById('2410.06301v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05608">arXiv:2410.05608</a> <span> [<a href="https://arxiv.org/pdf/2410.05608">pdf</a>, <a href="https://arxiv.org/ps/2410.05608">ps</a>, <a href="https://arxiv.org/format/2410.05608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Large Language Models and Tunings: Vision, Language, Sensors, Audio, and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+S+C">Soyeon Caren Han</a>, <a href="/search/?searchtype=author&query=Cao%2C+F">Feiqi Cao</a>, <a href="/search/?searchtype=author&query=Poon%2C+J">Josiah Poon</a>, <a href="/search/?searchtype=author&query=Navigli%2C+R">Roberto Navigli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05608v1-abstract-short" style="display: inline;"> This tutorial explores recent advancements in multimodal pretrained and large models, capable of integrating and processing diverse data forms such as text, images, audio, and video. Participants will gain an understanding of the foundational concepts of multimodality, the evolution of multimodal research, and the key technical challenges addressed by these models. We will cover the latest multimo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05608v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05608v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05608v1-abstract-full" style="display: none;"> This tutorial explores recent advancements in multimodal pretrained and large models, capable of integrating and processing diverse data forms such as text, images, audio, and video. Participants will gain an understanding of the foundational concepts of multimodality, the evolution of multimodal research, and the key technical challenges addressed by these models. We will cover the latest multimodal datasets and pretrained models, including those beyond vision and language. Additionally, the tutorial will delve into the intricacies of multimodal large models and instruction tuning strategies to optimise performance for specific tasks. Hands-on laboratories will offer practical experience with state-of-the-art multimodal models, demonstrating real-world applications like visual storytelling and visual question answering. This tutorial aims to equip researchers, practitioners, and newcomers with the knowledge and skills to leverage multimodal AI. ACM Multimedia 2024 is the ideal venue for this tutorial, aligning perfectly with our goal of understanding multimodal pretrained and large language models, and their tuning mechanisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05608v1-abstract-full').style.display = 'none'; document.getElementById('2410.05608v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ACM-MM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05449">arXiv:2410.05449</a> <span> [<a href="https://arxiv.org/pdf/2410.05449">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Skin Controlled Electronic and Neuromorphic Tattoos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Kireev%2C+D">Dmitry Kireev</a>, <a href="/search/?searchtype=author&query=Koripally%2C+N">Nandu Koripally</a>, <a href="/search/?searchtype=author&query=Liu%2C+S">Samuel Liu</a>, <a href="/search/?searchtype=author&query=Fleming%2C+G+C">Gabriella Coloyan Fleming</a>, <a href="/search/?searchtype=author&query=Varkey%2C+P">Philip Varkey</a>, <a href="/search/?searchtype=author&query=Belle%2C+J">Joseph Belle</a>, <a href="/search/?searchtype=author&query=Mohan%2C+S">Sivasakthya Mohan</a>, <a href="/search/?searchtype=author&query=Han%2C+S+S">Sang Sub Han</a>, <a href="/search/?searchtype=author&query=Xu%2C+D">Dong Xu</a>, <a href="/search/?searchtype=author&query=Jung%2C+Y">Yeonwoong Jung</a>, <a href="/search/?searchtype=author&query=Duan%2C+X">Xiangfeng Duan</a>, <a href="/search/?searchtype=author&query=Incorvia%2C+J+A+C">Jean Anne C. Incorvia</a>, <a href="/search/?searchtype=author&query=Akinwande%2C+D">Deji Akinwande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05449v1-abstract-short" style="display: inline;"> Wearable human activity sensors developed in the past decade show a distinct trend of becoming thinner and more imperceptible while retaining their electrical qualities, with graphene e-tattoos, as the ultimate example. A persistent challenge in modern wearables, however, is signal degradation due to the distance between the sensor's recording site and the signal transmission medium. To address th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05449v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05449v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05449v1-abstract-full" style="display: none;"> Wearable human activity sensors developed in the past decade show a distinct trend of becoming thinner and more imperceptible while retaining their electrical qualities, with graphene e-tattoos, as the ultimate example. A persistent challenge in modern wearables, however, is signal degradation due to the distance between the sensor's recording site and the signal transmission medium. To address this, we propose here to directly utilize human skin as a signal transmission medium as well as using low-cost gel electrodes for rapid probing of 2D transistor-based wearables. We demonstrate that the hypodermis layer of the skin can effectively serve as an electrolyte, enabling electrical potential application to semiconducting films made from graphene and other 2D materials placed on top of the skin. Graphene transistor tattoos, when biased through the body, exhibit high charge carrier mobility (up to 6500 2V-1s-1), with MoS2 and PtSe2 transistors showing mobilities up to 30 cm2V-1s-1 and 1 cm2V-1s-1, respectively. Finally, by introducing a layer of Nafion to the device structure, we observed neuromorphic functionality, transforming these e-tattoos into neuromorphic bioelectronic devices controlled through the skin itself. The neuromorphic bioelectronic tattoos have the potential for developing self-aware and stand-alone smart wearables, crucial for understanding and improving overall human performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05449v1-abstract-full').style.display = 'none'; document.getElementById('2410.05449v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04265">arXiv:2410.04265</a> <span> [<a href="https://arxiv.org/pdf/2410.04265">pdf</a>, <a href="https://arxiv.org/format/2410.04265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AI as Humanity's Salieri: Quantifying Linguistic Creativity of Language Models via Systematic Attribution of Machine Text against Web Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lu%2C+X">Ximing Lu</a>, <a href="/search/?searchtype=author&query=Sclar%2C+M">Melanie Sclar</a>, <a href="/search/?searchtype=author&query=Hallinan%2C+S">Skyler Hallinan</a>, <a href="/search/?searchtype=author&query=Mireshghallah%2C+N">Niloofar Mireshghallah</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jiacheng Liu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Seungju Han</a>, <a href="/search/?searchtype=author&query=Ettinger%2C+A">Allyson Ettinger</a>, <a href="/search/?searchtype=author&query=Jiang%2C+L">Liwei Jiang</a>, <a href="/search/?searchtype=author&query=Chandu%2C+K">Khyathi Chandu</a>, <a href="/search/?searchtype=author&query=Dziri%2C+N">Nouha Dziri</a>, <a href="/search/?searchtype=author&query=Choi%2C+Y">Yejin Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04265v1-abstract-short" style="display: inline;"> Creativity has long been considered one of the most difficult aspect of human intelligence for AI to mimic. However, the rise of Large Language Models (LLMs), like ChatGPT, has raised questions about whether AI can match or even surpass human creativity. We present CREATIVITY INDEX as the first step to quantify the linguistic creativity of a text by reconstructing it from existing text snippets on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04265v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04265v1-abstract-full" style="display: none;"> Creativity has long been considered one of the most difficult aspect of human intelligence for AI to mimic. However, the rise of Large Language Models (LLMs), like ChatGPT, has raised questions about whether AI can match or even surpass human creativity. We present CREATIVITY INDEX as the first step to quantify the linguistic creativity of a text by reconstructing it from existing text snippets on the web. CREATIVITY INDEX is motivated by the hypothesis that the seemingly remarkable creativity of LLMs may be attributable in large part to the creativity of human-written texts on the web. To compute CREATIVITY INDEX efficiently, we introduce DJ SEARCH, a novel dynamic programming algorithm that can search verbatim and near-verbatim matches of text snippets from a given document against the web. Experiments reveal that the CREATIVITY INDEX of professional human authors is on average 66.2% higher than that of LLMs, and that alignment reduces the CREATIVITY INDEX of LLMs by an average of 30.1%. In addition, we find that distinguished authors like Hemingway exhibit measurably higher CREATIVITY INDEX compared to other human writers. Finally, we demonstrate that CREATIVITY INDEX can be used as a surprisingly effective criterion for zero-shot machine text detection, surpassing the strongest existing zero-shot system, DetectGPT, by a significant margin of 30.2%, and even outperforming the strongest supervised system, GhostBuster, in five out of six domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04265v1-abstract-full').style.display = 'none'; document.getElementById('2410.04265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04164">arXiv:2410.04164</a> <span> [<a href="https://arxiv.org/pdf/2410.04164">pdf</a>, <a href="https://arxiv.org/format/2410.04164">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Towards Effective Counter-Responses: Aligning Human Preferences with Strategies to Combat Online Trolling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lee%2C+H">Huije Lee</a>, <a href="/search/?searchtype=author&query=Song%2C+H">Hoyun Song</a>, <a href="/search/?searchtype=author&query=Shin%2C+J">Jisu Shin</a>, <a href="/search/?searchtype=author&query=Cho%2C+S">Sukmin Cho</a>, <a href="/search/?searchtype=author&query=Han%2C+S">SeungYoon Han</a>, <a href="/search/?searchtype=author&query=Park%2C+J+C">Jong C. Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04164v1-abstract-short" style="display: inline;"> Trolling in online communities typically involves disruptive behaviors such as provoking anger and manipulating discussions, leading to a polarized atmosphere and emotional distress. Robust moderation is essential for mitigating these negative impacts and maintaining a healthy and constructive community atmosphere. However, effectively addressing trolls is difficult because their behaviors vary wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04164v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04164v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04164v1-abstract-full" style="display: none;"> Trolling in online communities typically involves disruptive behaviors such as provoking anger and manipulating discussions, leading to a polarized atmosphere and emotional distress. Robust moderation is essential for mitigating these negative impacts and maintaining a healthy and constructive community atmosphere. However, effectively addressing trolls is difficult because their behaviors vary widely and require different response strategies (RSs) to counter them. This diversity makes it challenging to choose an appropriate RS for each specific situation. To address this challenge, our research investigates whether humans have preferred strategies tailored to different types of trolling behaviors. Our findings reveal a correlation between the types of trolling encountered and the preferred RS. In this paper, we introduce a methodology for generating counter-responses to trolls by recommending appropriate RSs, supported by a dataset aligning these strategies with human preferences across various troll contexts. The experimental results demonstrate that our proposed approach guides constructive discussion and reduces the negative effects of trolls, thereby enhancing the online community environment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04164v1-abstract-full').style.display = 'none'; document.getElementById('2410.04164v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings of EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03858">arXiv:2410.03858</a> <span> [<a href="https://arxiv.org/pdf/2410.03858">pdf</a>, <a href="https://arxiv.org/format/2410.03858">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised Prior Learning: Discovering Categorical Pose Priors from Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+Z">Ziyu Wang</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Shuangpeng Han</a>, <a href="/search/?searchtype=author&query=Shou%2C+M+Z">Mike Zheng Shou</a>, <a href="/search/?searchtype=author&query=Zhang%2C+M">Mengmi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03858v1-abstract-short" style="display: inline;"> A prior represents a set of beliefs or assumptions about a system, aiding inference and decision-making. In this work, we introduce the challenge of unsupervised prior learning in pose estimation, where AI models learn pose priors of animate objects from videos in a self-supervised manner. These videos present objects performing various actions, providing crucial information about their keypoints… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03858v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03858v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03858v1-abstract-full" style="display: none;"> A prior represents a set of beliefs or assumptions about a system, aiding inference and decision-making. In this work, we introduce the challenge of unsupervised prior learning in pose estimation, where AI models learn pose priors of animate objects from videos in a self-supervised manner. These videos present objects performing various actions, providing crucial information about their keypoints and connectivity. While priors are effective in pose estimation, acquiring them can be difficult. We propose a novel method, named Pose Prior Learner (PPL), to learn general pose priors applicable to any object category. PPL uses a hierarchical memory to store compositional parts of prototypical poses, from which we distill a general pose prior. This prior enhances pose estimation accuracy through template transformation and image reconstruction. PPL learns meaningful pose priors without any additional human annotations or interventions, outperforming competitive baselines on both human and animal pose estimation datasets. Notably, our experimental results reveal the effectiveness of PPL using learnt priors for pose estimation on occluded images. Through iterative inference, PPL leverages priors to refine estimated poses, regressing them to any prototypical poses stored in memory. Our code, model, and data will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03858v1-abstract-full').style.display = 'none'; document.getElementById('2410.03858v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03009">arXiv:2410.03009</a> <span> [<a href="https://arxiv.org/pdf/2410.03009">pdf</a>, <a href="https://arxiv.org/format/2410.03009">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Superconductivity">cond-mat.supr-con</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mesoscale and Nanoscale Physics">cond-mat.mes-hall</span> </div> </div> <p class="title is-5 mathjax"> Implementing Josephson Junction spectroscopy in a scanning tunneling microscope </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Fortman%2C+M+A">Margaret A. Fortman</a>, <a href="/search/?searchtype=author&query=Harrison%2C+D+C">David C. Harrison</a>, <a href="/search/?searchtype=author&query=Rodriguez%2C+R+H">Ramiro H. Rodriguez</a>, <a href="/search/?searchtype=author&query=Krebs%2C+Z+J">Zachary J. Krebs</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Sangjun Han</a>, <a href="/search/?searchtype=author&query=Jang%2C+M+S">Min Seok Jang</a>, <a href="/search/?searchtype=author&query=McDermott%2C+R">Robert McDermott</a>, <a href="/search/?searchtype=author&query=Girit%2C+C+O">Caglar O. Girit</a>, <a href="/search/?searchtype=author&query=Brar%2C+V+W">Victor W. Brar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03009v1-abstract-short" style="display: inline;"> Josephson junction spectroscopy is a powerful local microwave spectroscopy technique that has promising potential as a diagnostic tool to probe the microscopic origins of noise in superconducting qubits. We present advancements toward realizing Josephson junction spectroscopy in a scanning geometry, where the Josephson junction is formed between a superconducting sample and a high capacitance supe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03009v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03009v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03009v1-abstract-full" style="display: none;"> Josephson junction spectroscopy is a powerful local microwave spectroscopy technique that has promising potential as a diagnostic tool to probe the microscopic origins of noise in superconducting qubits. We present advancements toward realizing Josephson junction spectroscopy in a scanning geometry, where the Josephson junction is formed between a superconducting sample and a high capacitance superconducting STM tip. Data from planar Nb-based Josephson junction devices first demonstrate the benefits of including a high capacitance shunt across the junction, which decreases linewidth and improves performance at elevated temperatures. We show how an equivalent circuit can be implemented by utilizing a planarized STM tip with local prominences, which are fabricated via electron beam lithography and reactive ion etching, followed by coating with a superconducting layer. Differential conductance measurements on a superconducting NbN surface demonstrate the ability of these high capacitance tips to decrease both thermal noise and P(E)-broadening in comparison to typical wire tips. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03009v1-abstract-full').style.display = 'none'; document.getElementById('2410.03009v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02384">arXiv:2410.02384</a> <span> [<a href="https://arxiv.org/pdf/2410.02384">pdf</a>, <a href="https://arxiv.org/format/2410.02384">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Unveiling AI's Blind Spots: An Oracle for In-Domain, Out-of-Domain, and Adversarial Errors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+S">Shuangpeng Han</a>, <a href="/search/?searchtype=author&query=Zhang%2C+M">Mengmi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02384v1-abstract-short" style="display: inline;"> AI models make mistakes when recognizing images-whether in-domain, out-of-domain, or adversarial. Predicting these errors is critical for improving system reliability, reducing costly mistakes, and enabling proactive corrections in real-world applications such as healthcare, finance, and autonomous systems. However, understanding what mistakes AI models make, why they occur, and how to predict the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02384v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02384v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02384v1-abstract-full" style="display: none;"> AI models make mistakes when recognizing images-whether in-domain, out-of-domain, or adversarial. Predicting these errors is critical for improving system reliability, reducing costly mistakes, and enabling proactive corrections in real-world applications such as healthcare, finance, and autonomous systems. However, understanding what mistakes AI models make, why they occur, and how to predict them remains an open challenge. Here, we conduct comprehensive empirical evaluations using a "mentor" model-a deep neural network designed to predict another model's errors. Our findings show that the mentor model excels at learning from a mentee's mistakes on adversarial images with small perturbations and generalizes effectively to predict in-domain and out-of-domain errors of the mentee. Additionally, transformer-based mentor models excel at predicting errors across various mentee architectures. Subsequently, we draw insights from these observations and develop an "oracle" mentor model, dubbed SuperMentor, that achieves 78% accuracy in predicting errors across different error types. Our error prediction framework paves the way for future research on anticipating and correcting AI model behaviours, ultimately increasing trust in AI systems. All code, models, and data will be made publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02384v1-abstract-full').style.display = 'none'; document.getElementById('2410.02384v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02203">arXiv:2410.02203</a> <span> [<a href="https://arxiv.org/pdf/2410.02203">pdf</a>, <a href="https://arxiv.org/format/2410.02203">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> GraphIC: A Graph-Based In-Context Example Retrieval Model for Multi-Step Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Fu%2C+J">Jiale Fu</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yaqing Wang</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Simeng Han</a>, <a href="/search/?searchtype=author&query=Fan%2C+J">Jiaming Fan</a>, <a href="/search/?searchtype=author&query=Si%2C+C">Chen Si</a>, <a href="/search/?searchtype=author&query=Yang%2C+X">Xu Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02203v1-abstract-short" style="display: inline;"> In-context learning (ICL) enables large language models (LLMs) to generalize to new tasks by incorporating a few in-context examples (ICEs) directly in the input, without updating parameters. However, the effectiveness of ICL heavily relies on the selection of ICEs, and conventional text-based embedding methods are often inadequate for tasks that require multi-step reasoning, such as mathematical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02203v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02203v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02203v1-abstract-full" style="display: none;"> In-context learning (ICL) enables large language models (LLMs) to generalize to new tasks by incorporating a few in-context examples (ICEs) directly in the input, without updating parameters. However, the effectiveness of ICL heavily relies on the selection of ICEs, and conventional text-based embedding methods are often inadequate for tasks that require multi-step reasoning, such as mathematical and logical problem solving. This is due to the bias introduced by shallow semantic similarities that fail to capture the deeper reasoning structures required for these tasks. We present GraphIC, a novel approach that leverages graph-based representations of reasoning processes, coupled with Bayesian Networks (BNs) to select ICEs. Graph structures inherently filter out shallow semantics while preserving the core reasoning structure. Importantly, BNs capture the dependency of a node's attributes on its parent nodes, closely mirroring the hierarchical nature of human cognition-where each thought is shaped by preceding ones. This makes BNs particularly well-suited for multi-step reasoning tasks, aligning the process more closely with human-like reasoning. Extensive experiments across three types of reasoning tasks (mathematical reasoning, code generation, and logical reasoning) demonstrate that GraphIC outperforms both training-free and training-based models in selecting ICEs, excelling in terms of both effectiveness and efficiency. We show that GraphIC enhances ICL's performance and interoperability, significantly advancing ICE selection for multi-step reasoning tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02203v1-abstract-full').style.display = 'none'; document.getElementById('2410.02203v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01696">arXiv:2410.01696</a> <span> [<a href="https://arxiv.org/pdf/2410.01696">pdf</a>, <a href="https://arxiv.org/format/2410.01696">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CreDes: Causal Reasoning Enhancement and Dual-End Searching for Solving Long-Range Reasoning Problems using LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+K">Kangsheng Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+X">Xiao Zhang</a>, <a href="/search/?searchtype=author&query=Liu%2C+H">Hao Liu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Songde Han</a>, <a href="/search/?searchtype=author&query=Ma%2C+H">Huimin Ma</a>, <a href="/search/?searchtype=author&query=Hu%2C+T">Tianyu Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01696v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated limitations in handling combinatorial optimization problems involving long-range reasoning, partially due to causal hallucinations and huge search space. As for causal hallucinations, i.e., the inconsistency between reasoning and corresponding state transition, this paper introduces the Causal Relationship Enhancement (CRE) mechanism combining cause-e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01696v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01696v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01696v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated limitations in handling combinatorial optimization problems involving long-range reasoning, partially due to causal hallucinations and huge search space. As for causal hallucinations, i.e., the inconsistency between reasoning and corresponding state transition, this paper introduces the Causal Relationship Enhancement (CRE) mechanism combining cause-effect interventions and the Individual Treatment Effect (ITE) to guarantee the solid causal rightness between each step of reasoning and state transition. As for the long causal range and huge search space limiting the performances of existing models featuring single-direction search, a Dual-End Searching (DES) approach is proposed to seek solutions by simultaneously starting from both the initial and goal states on the causal probability tree. By integrating CRE and DES (CreDes), our model has realized simultaneous multi-step reasoning, circumventing the inefficiencies from cascading multiple one-step reasoning like the Chain-of-Thought (CoT). Experiments demonstrate that CreDes significantly outperforms existing State-Of-The-Art (SOTA) solutions in long-range reasoning tasks in terms of both accuracy and time efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01696v1-abstract-full').style.display = 'none'; document.getElementById('2410.01696v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01609">arXiv:2410.01609</a> <span> [<a href="https://arxiv.org/pdf/2410.01609">pdf</a>, <a href="https://arxiv.org/format/2410.01609">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DAViD: Domain Adaptive Visually-Rich Document Understanding with Synthetic Insights </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ding%2C+Y">Yihao Ding</a>, <a href="/search/?searchtype=author&query=Han%2C+S+C">Soyeon Caren Han</a>, <a href="/search/?searchtype=author&query=Li%2C+Z">Zechuan Li</a>, <a href="/search/?searchtype=author&query=Chung%2C+H">Hyunsuk Chung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01609v1-abstract-short" style="display: inline;"> Visually-Rich Documents (VRDs), encompassing elements like charts, tables, and references, convey complex information across various fields. However, extracting information from these rich documents is labor-intensive, especially given their inconsistent formats and domain-specific requirements. While pretrained models for VRD Understanding have progressed, their reliance on large, annotated datas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01609v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01609v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01609v1-abstract-full" style="display: none;"> Visually-Rich Documents (VRDs), encompassing elements like charts, tables, and references, convey complex information across various fields. However, extracting information from these rich documents is labor-intensive, especially given their inconsistent formats and domain-specific requirements. While pretrained models for VRD Understanding have progressed, their reliance on large, annotated datasets limits scalability. This paper introduces the Domain Adaptive Visually-rich Document Understanding (DAViD) framework, which utilises machine-generated synthetic data for domain adaptation. DAViD integrates fine-grained and coarse-grained document representation learning and employs synthetic annotations to reduce the need for costly manual labelling. By leveraging pretrained models and synthetic data, DAViD achieves competitive performance with minimal annotated datasets. Extensive experiments validate DAViD's effectiveness, demonstrating its ability to efficiently adapt to domain-specific VRDU tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01609v1-abstract-full').style.display = 'none'; document.getElementById('2410.01609v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00846">arXiv:2410.00846</a> <span> [<a href="https://arxiv.org/pdf/2410.00846">pdf</a>, <a href="https://arxiv.org/format/2410.00846">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Why Are Learned Indexes So Effective but Sometimes Ineffective? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+Q">Qiyu Liu</a>, <a href="/search/?searchtype=author&query=Han%2C+S">Siyuan Han</a>, <a href="/search/?searchtype=author&query=Qi%2C+Y">Yanlin Qi</a>, <a href="/search/?searchtype=author&query=Peng%2C+J">Jingshu Peng</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jin Li</a>, <a href="/search/?searchtype=author&query=Lin%2C+L">Longlong Lin</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Lei Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00846v1-abstract-short" style="display: inline;"> Learned indexes have attracted significant research interest due to their ability to offer better space-time trade-offs compared to traditional B+-tree variants. Among various learned indexes, the PGM-Index based on error-bounded piecewise linear approximation is an elegant data structure that has demonstrated \emph{provably} superior performance over conventional B+-tree indexes. In this paper, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00846v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00846v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00846v1-abstract-full" style="display: none;"> Learned indexes have attracted significant research interest due to their ability to offer better space-time trade-offs compared to traditional B+-tree variants. Among various learned indexes, the PGM-Index based on error-bounded piecewise linear approximation is an elegant data structure that has demonstrated \emph{provably} superior performance over conventional B+-tree indexes. In this paper, we explore two interesting research questions regarding the PGM-Index: (a) \emph{Why are PGM-Indexes theoretically effective?} and (b) \emph{Why do PGM-Indexes underperform in practice?} For question~(a), we first prove that, for a set of $N$ sorted keys, the PGM-Index can, with high probability, achieve a lookup time of $O(\log\log N)$ while using $O(N)$ space. To the best of our knowledge, this is the \textbf{tightest bound} for learned indexes to date. For question~(b), we identify that querying PGM-Indexes is highly memory-bound, where the internal error-bounded search operations often become the bottleneck. To fill the performance gap, we propose PGM++, a \emph{simple yet effective} extension to the original PGM-Index that employs a mixture of different search strategies, with hyper-parameters automatically tuned through a calibrated cost model. Extensive experiments on real workloads demonstrate that PGM++ establishes a new Pareto frontier. At comparable space costs, PGM++ speeds up index lookup queries by up to $\mathbf{2.31\times}$ and $\mathbf{1.56\times}$ when compared to the original PGM-Index and state-of-the-art learned indexes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00846v1-abstract-full').style.display = 'none'; document.getElementById('2410.00846v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Han%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Han%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Han%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Han%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Han%2C+S&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Han%2C+S&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository