CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 801 results for author: <span class="mathjax">Lu, X</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Lu%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Lu, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Lu%2C+X&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Lu, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Lu%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Lu%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Lu%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Lu%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Lu%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Lu%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11504">arXiv:2411.11504</a> <span> [<a href="https://arxiv.org/pdf/2411.11504">pdf</a>, <a href="https://arxiv.org/format/2411.11504">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Search, Verify and Feedback: Towards Next Generation Post-training Paradigm of Foundation Models via Verifier Engineering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guan%2C+X">Xinyan Guan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yanjiang Liu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xinyu Lu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+B">Boxi Cao</a>, <a href="/search/cs?searchtype=author&query=He%2C+B">Ben He</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xianpei Han</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Le Sun</a>, <a href="/search/cs?searchtype=author&query=Lou%2C+J">Jie Lou</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+B">Bowen Yu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yaojie Lu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hongyu Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11504v1-abstract-short" style="display: inline;"> The evolution of machine learning has increasingly prioritized the development of powerful models and more scalable supervision signals. However, the emergence of foundation models presents significant challenges in providing effective supervision signals necessary for further enhancing their capabilities. Consequently, there is an urgent need to explore novel supervision signals and technical app… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11504v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11504v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11504v1-abstract-full" style="display: none;"> The evolution of machine learning has increasingly prioritized the development of powerful models and more scalable supervision signals. However, the emergence of foundation models presents significant challenges in providing effective supervision signals necessary for further enhancing their capabilities. Consequently, there is an urgent need to explore novel supervision signals and technical approaches. In this paper, we propose verifier engineering, a novel post-training paradigm specifically designed for the era of foundation models. The core of verifier engineering involves leveraging a suite of automated verifiers to perform verification tasks and deliver meaningful feedback to foundation models. We systematically categorize the verifier engineering process into three essential stages: search, verify, and feedback, and provide a comprehensive review of state-of-the-art research developments within each stage. We believe that verifier engineering constitutes a fundamental pathway toward achieving Artificial General Intelligence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11504v1-abstract-full').style.display = 'none'; document.getElementById('2411.11504v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10640">arXiv:2411.10640</a> <span> [<a href="https://arxiv.org/pdf/2411.10640">pdf</a>, <a href="https://arxiv.org/format/2411.10640">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BlueLM-V-3B: Algorithm and System Co-Design for Multimodal Large Language Models on Mobile Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xudong Lu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yinghao Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cheng Chen</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+H">Hui Tan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Boheng Chen</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yina Xie</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+R">Rui Hu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+G">Guanxin Tan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+R">Renshou Wu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yan Hu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yi Zeng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lei Wu</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+L">Liuyang Bian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhaoxiong Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Long Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yanzhou Yang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+H">Han Xiao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Aojun Zhou</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yafei Wen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaoxin Chen</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+S">Shuai Ren</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongsheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10640v1-abstract-short" style="display: inline;"> The emergence and growing popularity of multimodal large language models (MLLMs) have significant potential to enhance various aspects of daily life, from improving communication to facilitating learning and problem-solving. Mobile phones, as essential daily companions, represent the most effective and accessible deployment platform for MLLMs, enabling seamless integration into everyday tasks. How… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10640v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10640v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10640v1-abstract-full" style="display: none;"> The emergence and growing popularity of multimodal large language models (MLLMs) have significant potential to enhance various aspects of daily life, from improving communication to facilitating learning and problem-solving. Mobile phones, as essential daily companions, represent the most effective and accessible deployment platform for MLLMs, enabling seamless integration into everyday tasks. However, deploying MLLMs on mobile phones presents challenges due to limitations in memory size and computational capability, making it difficult to achieve smooth and real-time processing without extensive optimization. In this paper, we present BlueLM-V-3B, an algorithm and system co-design approach specifically tailored for the efficient deployment of MLLMs on mobile platforms. To be specific, we redesign the dynamic resolution scheme adopted by mainstream MLLMs and implement system optimization for hardware-aware deployment to optimize model inference on mobile phones. BlueLM-V-3B boasts the following key highlights: (1) Small Size: BlueLM-V-3B features a language model with 2.7B parameters and a vision encoder with 400M parameters. (2) Fast Speed: BlueLM-V-3B achieves a generation speed of 24.4 token/s on the MediaTek Dimensity 9300 processor with 4-bit LLM weight quantization. (3) Strong Performance: BlueLM-V-3B has attained the highest average score of 66.1 on the OpenCompass benchmark among models with $\leq$ 4B parameters and surpassed a series of models with much larger parameter sizes (e.g., MiniCPM-V-2.6, InternVL2-8B). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10640v1-abstract-full').style.display = 'none'; document.getElementById('2411.10640v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09439">arXiv:2411.09439</a> <span> [<a href="https://arxiv.org/pdf/2411.09439">pdf</a>, <a href="https://arxiv.org/format/2411.09439">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Spider: Any-to-Many Multimodal LLM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lai%2C+J">Jinxiang Lai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jun Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jian Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaocheng Lu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+S">Song Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09439v1-abstract-short" style="display: inline;"> Multimodal LLMs (MLLMs) have emerged as an extension of Large Language Models (LLMs), enabling the integration of various modalities. However, Any-to-Any MLLMs are limited to generating pairwise modalities 'Text + X' within a single response, such as Text + {Image or Audio or Video}. To address this limitation, we introduce Spider, a novel efficient Any-to-Many Modalities Generation (AMMG) framewo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09439v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09439v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09439v1-abstract-full" style="display: none;"> Multimodal LLMs (MLLMs) have emerged as an extension of Large Language Models (LLMs), enabling the integration of various modalities. However, Any-to-Any MLLMs are limited to generating pairwise modalities 'Text + X' within a single response, such as Text + {Image or Audio or Video}. To address this limitation, we introduce Spider, a novel efficient Any-to-Many Modalities Generation (AMMG) framework, which can generate an arbitrary combination of modalities 'Text + Xs', such as Text + {Image and Audio and Video}. To achieve efficient AMMG, our Spider integrates three core components: a Base Model for basic X-to-X (i.e., Any-to-Any) modality processing, a novel Efficient Decoders-Controller for controlling multimodal Decoders to generate Xs (many-modal) contents, and an Any-to-Many Instruction Template designed for producing Xs signal prompts. To train Spider, we constructed a novel Text-formatted Many-Modal (TMM) dataset, which facilitates the learning of the X-to-Xs (i.e., Any-to-Many) capability necessary for AMMG. Ultimately, the well-trained Spider generates a pseudo X-to-Xs dataset, the first-ever X-to-Xs many-modal dataset, enhancing the potential for AMMG task in future research. Overall, this work not only pushes the boundary of multimodal interaction but also provides rich data support for advancing the field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09439v1-abstract-full').style.display = 'none'; document.getElementById('2411.09439v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08453">arXiv:2411.08453</a> <span> [<a href="https://arxiv.org/pdf/2411.08453">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Biomass phenotyping of oilseed rape through UAV multi-view oblique imaging with 3DGS and SAM model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yutao Shen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hongyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xin Yang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuqi Lu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Ziyue Guo</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Lixi Jiang</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yong He</a>, <a href="/search/cs?searchtype=author&query=Cen%2C+H">Haiyan Cen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08453v1-abstract-short" style="display: inline;"> Biomass estimation of oilseed rape is crucial for optimizing crop productivity and breeding strategies. While UAV-based imaging has advanced high-throughput phenotyping, current methods often rely on orthophoto images, which struggle with overlapping leaves and incomplete structural information in complex field environments. This study integrates 3D Gaussian Splatting (3DGS) with the Segment Anyth… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08453v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08453v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08453v1-abstract-full" style="display: none;"> Biomass estimation of oilseed rape is crucial for optimizing crop productivity and breeding strategies. While UAV-based imaging has advanced high-throughput phenotyping, current methods often rely on orthophoto images, which struggle with overlapping leaves and incomplete structural information in complex field environments. This study integrates 3D Gaussian Splatting (3DGS) with the Segment Anything Model (SAM) for precise 3D reconstruction and biomass estimation of oilseed rape. UAV multi-view oblique images from 36 angles were used to perform 3D reconstruction, with the SAM module enhancing point cloud segmentation. The segmented point clouds were then converted into point cloud volumes, which were fitted to ground-measured biomass using linear regression. The results showed that 3DGS (7k and 30k iterations) provided high accuracy, with peak signal-to-noise ratios (PSNR) of 27.43 and 29.53 and training times of 7 and 49 minutes, respectively. This performance exceeded that of structure from motion (SfM) and mipmap Neural Radiance Fields (Mip-NeRF), demonstrating superior efficiency. The SAM module achieved high segmentation accuracy, with a mean intersection over union (mIoU) of 0.961 and an F1-score of 0.980. Additionally, a comparison of biomass extraction models found the point cloud volume model to be the most accurate, with an determination coefficient (R2) of 0.976, root mean square error (RMSE) of 2.92 g/plant, and mean absolute percentage error (MAPE) of 6.81%, outperforming both the plot crop volume and individual crop volume models. This study highlights the potential of combining 3DGS with multi-view UAV imaging for improved biomass phenotyping. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08453v1-abstract-full').style.display = 'none'; document.getElementById('2411.08453v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07037">arXiv:2411.07037</a> <span> [<a href="https://arxiv.org/pdf/2411.07037">pdf</a>, <a href="https://arxiv.org/format/2411.07037">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LIFBench: Evaluating the Instruction Following Performance and Stability of Large Language Models in Long-Context Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaodong Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Minhao Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yichen Liu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaoming Shi</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+H">He Yan</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiangju Lu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Junmin Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07037v1-abstract-short" style="display: inline;"> As Large Language Models (LLMs) continue to advance in natural language processing (NLP), their ability to stably follow instructions in long-context inputs has become crucial for real-world applications. While existing benchmarks assess various LLM capabilities, they rarely focus on instruction-following in long-context scenarios or stability on different inputs. In response, we introduce the Lon… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07037v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07037v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07037v1-abstract-full" style="display: none;"> As Large Language Models (LLMs) continue to advance in natural language processing (NLP), their ability to stably follow instructions in long-context inputs has become crucial for real-world applications. While existing benchmarks assess various LLM capabilities, they rarely focus on instruction-following in long-context scenarios or stability on different inputs. In response, we introduce the Long-context Instruction-Following Benchmark (LIFBench), a scalable dataset designed to evaluate LLMs' instruction-following capabilities and stability across long contexts. LIFBench comprises three long-context scenarios and eleven diverse tasks, supported by 2,766 instructions generated through an automated expansion method across three dimensions: length, expression, and variables. For evaluation, we propose LIFEval, a rubric-based assessment framework that provides precise, automated scoring of complex LLM responses without relying on LLM-assisted evaluations or human judgments. This approach facilitates a comprehensive analysis of model performance and stability across various perspectives. We conduct extensive experiments on 20 notable LLMs across six length intervals, analyzing their instruction-following capabilities and stability. Our work contributes LIFBench and LIFEval as robust tools for assessing LLM performance in complex, long-context settings, providing insights that can inform future LLM development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07037v1-abstract-full').style.display = 'none'; document.getElementById('2411.07037v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06224">arXiv:2411.06224</a> <span> [<a href="https://arxiv.org/pdf/2411.06224">pdf</a>, <a href="https://arxiv.org/format/2411.06224">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Advancing GPU IPC for stiff affine-deformable simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kemeng Huang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xinyu Lu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Huancheng Lin</a>, <a href="/search/cs?searchtype=author&query=Komura%2C+T">Taku Komura</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Minchen Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06224v2-abstract-short" style="display: inline;"> Incremental Potential Contact (IPC) is a widely used, robust, and accurate method for simulating complex frictional contact behaviors. However, achieving high efficiency remains a major challenge, particularly as material stiffness increases, which leads to slower Preconditioned Conjugate Gradient (PCG) convergence, even with the state-of-the-art preconditioners. In this paper, we propose a fully… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06224v2-abstract-full').style.display = 'inline'; document.getElementById('2411.06224v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06224v2-abstract-full" style="display: none;"> Incremental Potential Contact (IPC) is a widely used, robust, and accurate method for simulating complex frictional contact behaviors. However, achieving high efficiency remains a major challenge, particularly as material stiffness increases, which leads to slower Preconditioned Conjugate Gradient (PCG) convergence, even with the state-of-the-art preconditioners. In this paper, we propose a fully GPU-optimized IPC simulation framework capable of handling materials across a wide range of stiffnesses, delivering consistent high performance and scalability with up to 10x speedup over state-of-the-art GPU IPC methods. Our framework introduces three key innovations: 1) A novel connectivity-enhanced Multilevel Additive Schwarz (MAS) preconditioner on the GPU, designed to efficiently capture both stiff and soft elastodynamics and improve PCG convergence at a reduced preconditioning cost. 2) A C2-continuous cubic energy with an analytic eigensystem for strain limiting, enabling more parallel-friendly simulations of stiff membranes, such as cloth, without membrane locking. 3) For extremely stiff behaviors where elastic waves are barely visible, we employ affine body dynamics (ABD) with a hash-based multi-layer reduction strategy for fast Hessian assembly and efficient affine-deformable coupling. We conduct extensive performance analyses and benchmark studies to compare our framework against state-of-the-art methods and alternative design choices. Our system consistently delivers the fastest performance across soft, stiff, and hybrid simulation scenarios, even in cases with high resolution, large deformations, and high-speed impacts. Our framework will be fully open-sourced upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06224v2-abstract-full').style.display = 'none'; document.getElementById('2411.06224v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06197">arXiv:2411.06197</a> <span> [<a href="https://arxiv.org/pdf/2411.06197">pdf</a>, <a href="https://arxiv.org/format/2411.06197">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-object Tracking by Detection and Query: an efficient end-to-end manner </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jia%2C+S">Shukun Jia</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yichao Cao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Feng Yang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xin Lu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaobo Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06197v1-abstract-short" style="display: inline;"> Multi-object tracking is advancing through two dominant paradigms: traditional tracking by detection and newly emerging tracking by query. In this work, we fuse them together and propose the tracking-by-detection-and-query paradigm, which is achieved by a Learnable Associator. Specifically, the basic information interaction module and the content-position alignment module are proposed for thorough… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06197v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06197v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06197v1-abstract-full" style="display: none;"> Multi-object tracking is advancing through two dominant paradigms: traditional tracking by detection and newly emerging tracking by query. In this work, we fuse them together and propose the tracking-by-detection-and-query paradigm, which is achieved by a Learnable Associator. Specifically, the basic information interaction module and the content-position alignment module are proposed for thorough information Interaction among object queries. Tracking results are directly Decoded from these queries. Hence, we name the method as LAID. Compared to tracking-by-query models, LAID achieves competitive tracking accuracy with notably higher training efficiency. With regard to tracking-by-detection methods, experimental results on DanceTrack show that LAID significantly surpasses the state-of-the-art heuristic method by 3.9% on HOTA metric and 6.1% on IDF1 metric. On SportsMOT, LAID also achieves the best score on HOTA metric. By holding low training cost, strong tracking capabilities, and an elegant end-to-end approach all at once, LAID presents a forward-looking direction for the field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06197v1-abstract-full').style.display = 'none'; document.getElementById('2411.06197v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05783">arXiv:2411.05783</a> <span> [<a href="https://arxiv.org/pdf/2411.05783">pdf</a>, <a href="https://arxiv.org/format/2411.05783">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> ASL STEM Wiki: Dataset and Benchmark for Interpreting STEM Articles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+K">Kayo Yin</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+C">Chinmay Singh</a>, <a href="/search/cs?searchtype=author&query=Minakov%2C+F+O">Fyodor O. Minakov</a>, <a href="/search/cs?searchtype=author&query=Milan%2C+V">Vanessa Milan</a>, <a href="/search/cs?searchtype=author&query=Daum%C3%A9%2C+H">Hal Daum茅 III</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Cyril Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+A+X">Alex X. Lu</a>, <a href="/search/cs?searchtype=author&query=Bragg%2C+D">Danielle Bragg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05783v1-abstract-short" style="display: inline;"> Deaf and hard-of-hearing (DHH) students face significant barriers in accessing science, technology, engineering, and mathematics (STEM) education, notably due to the scarcity of STEM resources in signed languages. To help address this, we introduce ASL STEM Wiki: a parallel corpus of 254 Wikipedia articles on STEM topics in English, interpreted into over 300 hours of American Sign Language (ASL).… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05783v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05783v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05783v1-abstract-full" style="display: none;"> Deaf and hard-of-hearing (DHH) students face significant barriers in accessing science, technology, engineering, and mathematics (STEM) education, notably due to the scarcity of STEM resources in signed languages. To help address this, we introduce ASL STEM Wiki: a parallel corpus of 254 Wikipedia articles on STEM topics in English, interpreted into over 300 hours of American Sign Language (ASL). ASL STEM Wiki is the first continuous signing dataset focused on STEM, facilitating the development of AI resources for STEM education in ASL. We identify several use cases of ASL STEM Wiki with human-centered applications. For example, because this dataset highlights the frequent use of fingerspelling for technical concepts, which inhibits DHH students' ability to learn, we develop models to identify fingerspelled words -- which can later be used to query for appropriate ASL signs to suggest to interpreters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05783v1-abstract-full').style.display = 'none'; document.getElementById('2411.05783v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04799">arXiv:2411.04799</a> <span> [<a href="https://arxiv.org/pdf/2411.04799">pdf</a>, <a href="https://arxiv.org/format/2411.04799">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Kwai-STaR: Transform LLMs into State-Transition Reasoners </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xingyu Lu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yuhang Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Changyi Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianke Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhenyu Yang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Z">Zhixiang Ding</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+S">Shengsheng Qian</a>, <a href="/search/cs?searchtype=author&query=Du%2C+M">Meng Du</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+R">Ruiwen Kang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+K">Kaiyu Tang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+T">Tingting Gao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Di Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Hai-Tao Zheng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+B">Bin Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04799v2-abstract-short" style="display: inline;"> Mathematical reasoning presents a significant challenge to the cognitive capabilities of LLMs. Various methods have been proposed to enhance the mathematical ability of LLMs. However, few recognize the value of state transition for LLM reasoning. In this work, we define mathematical problem-solving as a process of transiting from an initial unsolved state to the final resolved state, and propose K… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04799v2-abstract-full').style.display = 'inline'; document.getElementById('2411.04799v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04799v2-abstract-full" style="display: none;"> Mathematical reasoning presents a significant challenge to the cognitive capabilities of LLMs. Various methods have been proposed to enhance the mathematical ability of LLMs. However, few recognize the value of state transition for LLM reasoning. In this work, we define mathematical problem-solving as a process of transiting from an initial unsolved state to the final resolved state, and propose Kwai-STaR framework, which transforms LLMs into State-Transition Reasoners to improve their intuitive reasoning capabilities. Our approach comprises three main steps: (1) Define the state space tailored to the mathematical reasoning. (2) Generate state-transition data based on the state space. (3) Convert original LLMs into State-Transition Reasoners via a curricular training strategy. Our experiments validate the effectiveness of Kwai-STaR in enhancing mathematical reasoning: After training on the small-scale Kwai-STaR dataset, general LLMs, including Mistral-7B and LLaMA-3, achieve considerable performance gain on the GSM8K and GSM-Hard dataset. Additionally, the state transition-based design endows Kwai-STaR with remarkable training and inference efficiency. Further experiments are underway to establish the generality of Kwai-STaR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04799v2-abstract-full').style.display = 'none'; document.getElementById('2411.04799v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00632">arXiv:2411.00632</a> <span> [<a href="https://arxiv.org/pdf/2411.00632">pdf</a>, <a href="https://arxiv.org/format/2411.00632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PCoTTA: Continual Test-Time Adaptation for Multi-Task Point Cloud Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jincen Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qianyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuhang Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xinkui Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Meili Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lizhuang Ma</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+J">Jian Chang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J+J">Jian Jun Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuequan Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00632v1-abstract-short" style="display: inline;"> In this paper, we present PCoTTA, an innovative, pioneering framework for Continual Test-Time Adaptation (CoTTA) in multi-task point cloud understanding, enhancing the model's transferability towards the continually changing target domain. We introduce a multi-task setting for PCoTTA, which is practical and realistic, handling multiple tasks within one unified model during the continual adaptation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00632v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00632v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00632v1-abstract-full" style="display: none;"> In this paper, we present PCoTTA, an innovative, pioneering framework for Continual Test-Time Adaptation (CoTTA) in multi-task point cloud understanding, enhancing the model's transferability towards the continually changing target domain. We introduce a multi-task setting for PCoTTA, which is practical and realistic, handling multiple tasks within one unified model during the continual adaptation. Our PCoTTA involves three key components: automatic prototype mixture (APM), Gaussian Splatted feature shifting (GSFS), and contrastive prototype repulsion (CPR). Firstly, APM is designed to automatically mix the source prototypes with the learnable prototypes with a similarity balancing factor, avoiding catastrophic forgetting. Then, GSFS dynamically shifts the testing sample toward the source domain, mitigating error accumulation in an online manner. In addition, CPR is proposed to pull the nearest learnable prototype close to the testing feature and push it away from other prototypes, making each prototype distinguishable during the adaptation. Experimental comparisons lead to a new benchmark, demonstrating PCoTTA's superiority in boosting the model's transferability towards the continually changing target domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00632v1-abstract-full').style.display = 'none'; document.getElementById('2411.00632v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22394">arXiv:2410.22394</a> <span> [<a href="https://arxiv.org/pdf/2410.22394">pdf</a>, <a href="https://arxiv.org/format/2410.22394">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AAAR-1.0: Assessing AI's Potential to Assist Research </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lou%2C+R">Renze Lou</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hanzi Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sijia Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+J">Jiangshu Du</a>, <a href="/search/cs?searchtype=author&query=Kamoi%2C+R">Ryo Kamoi</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaoxin Lu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jian Xie</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yuxuan Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yusen Zhang</a>, <a href="/search/cs?searchtype=author&query=Ahn%2C+J+J">Jihyun Janice Ahn</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+H">Hongchao Fang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+Z">Zhuoyang Zou</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+W">Wenchao Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xi Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+C">Congying Xia</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+L">Lifu Huang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+W">Wenpeng Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22394v1-abstract-short" style="display: inline;"> Numerous studies have assessed the proficiency of AI systems, particularly large language models (LLMs), in facilitating everyday tasks such as email writing, question answering, and creative content generation. However, researchers face unique challenges and opportunities in leveraging LLMs for their own work, such as brainstorming research ideas, designing experiments, and writing or reviewing p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22394v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22394v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22394v1-abstract-full" style="display: none;"> Numerous studies have assessed the proficiency of AI systems, particularly large language models (LLMs), in facilitating everyday tasks such as email writing, question answering, and creative content generation. However, researchers face unique challenges and opportunities in leveraging LLMs for their own work, such as brainstorming research ideas, designing experiments, and writing or reviewing papers. In this study, we introduce AAAR-1.0, a benchmark dataset designed to evaluate LLM performance in three fundamental, expertise-intensive research tasks: (i) EquationInference, assessing the correctness of equations based on the contextual information in paper submissions; (ii) ExperimentDesign, designing experiments to validate research ideas and solutions; (iii) PaperWeakness, identifying weaknesses in paper submissions; and (iv) REVIEWCRITIQUE, identifying each segment in human reviews is deficient or not. AAAR-1.0 differs from prior benchmarks in two key ways: first, it is explicitly research-oriented, with tasks requiring deep domain expertise; second, it is researcher-oriented, mirroring the primary activities that researchers engage in on a daily basis. An evaluation of both open-source and proprietary LLMs reveals their potential as well as limitations in conducting sophisticated research tasks. We will keep iterating AAAR-1.0 to new versions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22394v1-abstract-full').style.display = 'none'; document.getElementById('2410.22394v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Webpage: https://renzelou.github.io/AAAR-1.0/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21027">arXiv:2410.21027</a> <span> [<a href="https://arxiv.org/pdf/2410.21027">pdf</a>, <a href="https://arxiv.org/format/2410.21027">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Transferable Post-training via Inverse Value Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xinyu Lu</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+X">Xueru Wen</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yaojie Lu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+B">Bowen Yu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hongyu Lin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Haiyang Yu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Le Sun</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xianpei Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yongbin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21027v1-abstract-short" style="display: inline;"> As post-training processes utilize increasingly large datasets and base models continue to grow in size, the computational demands and implementation challenges of existing algorithms are escalating significantly. In this paper, we propose modeling the changes at the logits level during post-training using a separate neural network (i.e., the value network). After training this network on a small… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21027v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21027v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21027v1-abstract-full" style="display: none;"> As post-training processes utilize increasingly large datasets and base models continue to grow in size, the computational demands and implementation challenges of existing algorithms are escalating significantly. In this paper, we propose modeling the changes at the logits level during post-training using a separate neural network (i.e., the value network). After training this network on a small base model using demonstrations, this network can be seamlessly integrated with other pre-trained models during inference, enables them to achieve similar capability enhancements. We systematically investigate the best practices for this paradigm in terms of pre-training weights and connection schemes. We demonstrate that the resulting value network has broad transferability across pre-trained models of different parameter sizes within the same family, models undergoing continuous pre-training within the same family, and models with different vocabularies across families. In certain cases, it can achieve performance comparable to full-parameter fine-tuning. Furthermore, we explore methods to enhance the transferability of the value model and prevent overfitting to the base model used during training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21027v1-abstract-full').style.display = 'none'; document.getElementById('2410.21027v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14268">arXiv:2410.14268</a> <span> [<a href="https://arxiv.org/pdf/2410.14268">pdf</a>, <a href="https://arxiv.org/format/2410.14268">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MoDification: Mixture of Depths Made Easy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chen Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+M">Meizhi Zhong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qimeng Wang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuantao Lu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Z">Zheyu Ye</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chengqiang Lu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yan Gao</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yao Hu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kehai Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Min Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+D">Dawei Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14268v1-abstract-short" style="display: inline;"> Long-context efficiency has recently become a trending topic in serving large language models (LLMs). And mixture of depths (MoD) is proposed as a perfect fit to bring down both latency and memory. In this paper, however, we discover that MoD can barely transform existing LLMs without costly training over an extensive number of tokens. To enable the transformations from any LLMs to MoD ones, we sh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14268v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14268v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14268v1-abstract-full" style="display: none;"> Long-context efficiency has recently become a trending topic in serving large language models (LLMs). And mixture of depths (MoD) is proposed as a perfect fit to bring down both latency and memory. In this paper, however, we discover that MoD can barely transform existing LLMs without costly training over an extensive number of tokens. To enable the transformations from any LLMs to MoD ones, we showcase top-k operator in MoD should be promoted to threshold-p operator, and refinement to architecture and data should also be crafted along. All these designs form our method termed MoDification. Through a comprehensive set of experiments covering model scales from 3B to 70B, we exhibit MoDification strikes an excellent balance between efficiency and effectiveness. MoDification can achieve up to ~1.2x speedup in latency and ~1.8x reduction in memory compared to original LLMs especially in long-context applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14268v1-abstract-full').style.display = 'none'; document.getElementById('2410.14268v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 9 figures, 5 tables, work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13786">arXiv:2410.13786</a> <span> [<a href="https://arxiv.org/pdf/2410.13786">pdf</a>, <a href="https://arxiv.org/format/2410.13786">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Emphasizing Semantic Consistency of Salient Posture for Speech-Driven Gesture Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fengqi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hexiang Wang</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+J">Jingyu Gong</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+R">Ran Yi</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qianyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuequan Lu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jiangbo Lu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lizhuang Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13786v1-abstract-short" style="display: inline;"> Speech-driven gesture generation aims at synthesizing a gesture sequence synchronized with the input speech signal. Previous methods leverage neural networks to directly map a compact audio representation to the gesture sequence, ignoring the semantic association of different modalities and failing to deal with salient gestures. In this paper, we propose a novel speech-driven gesture generation me… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13786v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13786v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13786v1-abstract-full" style="display: none;"> Speech-driven gesture generation aims at synthesizing a gesture sequence synchronized with the input speech signal. Previous methods leverage neural networks to directly map a compact audio representation to the gesture sequence, ignoring the semantic association of different modalities and failing to deal with salient gestures. In this paper, we propose a novel speech-driven gesture generation method by emphasizing the semantic consistency of salient posture. Specifically, we first learn a joint manifold space for the individual representation of audio and body pose to exploit the inherent semantic association between two modalities, and propose to enforce semantic consistency via a consistency loss. Furthermore, we emphasize the semantic consistency of salient postures by introducing a weakly-supervised detector to identify salient postures, and reweighting the consistency loss to focus more on learning the correspondence between salient postures and the high-level semantics of speech content. In addition, we propose to extract audio features dedicated to facial expression and body gesture separately, and design separate branches for face and body gesture synthesis. Extensive experimental results demonstrate the superiority of our method over the state-of-the-art approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13786v1-abstract-full').style.display = 'none'; document.getElementById('2410.13786v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13213">arXiv:2410.13213</a> <span> [<a href="https://arxiv.org/pdf/2410.13213">pdf</a>, <a href="https://arxiv.org/format/2410.13213">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LLMOPT: Learning to Define and Solve General Optimization Problems from Scratch </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Caigao Jiang</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+X">Xiang Shu</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+H">Hong Qian</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xingyu Lu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Aimin Zhou</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yang Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13213v1-abstract-short" style="display: inline;"> Optimization problems are prevalent across various scenarios. Formulating and then solving optimization problems described by natural language often requires highly specialized human expertise, which could block the widespread application of optimization-based decision making. To make problem formulating and solving automated, leveraging large language models (LLMs) has emerged as a potential way.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13213v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13213v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13213v1-abstract-full" style="display: none;"> Optimization problems are prevalent across various scenarios. Formulating and then solving optimization problems described by natural language often requires highly specialized human expertise, which could block the widespread application of optimization-based decision making. To make problem formulating and solving automated, leveraging large language models (LLMs) has emerged as a potential way. However, this kind of way suffers from the issue of optimization generalization. Namely, the accuracy of most current LLM-based methods and the generality of optimization problem types that they can model are still limited. In this paper, we propose a unified learning-based framework called LLMOPT to boost optimization generalization. Starting from the natural language descriptions of optimization problems and a pre-trained LLM, LLMOPT constructs the introduced five-element formulation as a universal model for learning to define diverse optimization problem types. Then, LLMOPT employs the multi-instruction tuning to enhance both problem formalization and solver code generation accuracy and generality. After that, to prevent hallucinations in LLMs, such as sacrificing solving accuracy to avoid execution errors, model alignment and self-correction mechanism are adopted in LLMOPT. We evaluate the optimization generalization ability of LLMOPT and compared methods across six real-world datasets covering roughly 20 fields such as health, environment, energy and manufacturing, etc. Extensive experiment results show that LLMOPT is able to model various optimization problem types such as linear/nonlinear programming, mixed integer programming and combinatorial optimization, and achieves a notable 11.08% average solving accuracy improvement compared with the state-of-the-art methods. The code is available at https://github.com/caigaojiang/LLMOPT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13213v1-abstract-full').style.display = 'none'; document.getElementById('2410.13213v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11908">arXiv:2410.11908</a> <span> [<a href="https://arxiv.org/pdf/2410.11908">pdf</a>, <a href="https://arxiv.org/format/2410.11908">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ChatHouseDiffusion: Prompt-Guided Generation and Editing of Floor Plans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qin%2C+S">Sizhong Qin</a>, <a href="/search/cs?searchtype=author&query=He%2C+C">Chengyu He</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qiaoyun Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Sen Yang</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+W">Wenjie Liao</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yi Gu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xinzheng Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11908v1-abstract-short" style="display: inline;"> The generation and editing of floor plans are critical in architectural planning, requiring a high degree of flexibility and efficiency. Existing methods demand extensive input information and lack the capability for interactive adaptation to user modifications. This paper introduces ChatHouseDiffusion, which leverages large language models (LLMs) to interpret natural language input, employs graph… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11908v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11908v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11908v1-abstract-full" style="display: none;"> The generation and editing of floor plans are critical in architectural planning, requiring a high degree of flexibility and efficiency. Existing methods demand extensive input information and lack the capability for interactive adaptation to user modifications. This paper introduces ChatHouseDiffusion, which leverages large language models (LLMs) to interpret natural language input, employs graphormer to encode topological relationships, and uses diffusion models to flexibly generate and edit floor plans. This approach allows iterative design adjustments based on user ideas, significantly enhancing design efficiency. Compared to existing models, ChatHouseDiffusion achieves higher Intersection over Union (IoU) scores, permitting precise, localized adjustments without the need for complete redesigns, thus offering greater practicality. Experiments demonstrate that our model not only strictly adheres to user specifications but also facilitates a more intuitive design process through its interactive capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11908v1-abstract-full').style.display = 'none'; document.getElementById('2410.11908v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10700">arXiv:2410.10700</a> <span> [<a href="https://arxiv.org/pdf/2410.10700">pdf</a>, <a href="https://arxiv.org/format/2410.10700">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Derail Yourself: Multi-turn LLM Jailbreak Attack through Self-discovered Clues </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+Q">Qibing Ren</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dongrui Liu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zhanxu Xie</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaoya Lu</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+Y">Yu Qiao</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+L">Lei Sha</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Junchi Yan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lizhuang Ma</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+J">Jing Shao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10700v1-abstract-short" style="display: inline;"> This study exposes the safety vulnerabilities of Large Language Models (LLMs) in multi-turn interactions, where malicious users can obscure harmful intents across several queries. We introduce ActorAttack, a novel multi-turn attack method inspired by actor-network theory, which models a network of semantically linked actors as attack clues to generate diverse and effective attack paths toward harm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10700v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10700v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10700v1-abstract-full" style="display: none;"> This study exposes the safety vulnerabilities of Large Language Models (LLMs) in multi-turn interactions, where malicious users can obscure harmful intents across several queries. We introduce ActorAttack, a novel multi-turn attack method inspired by actor-network theory, which models a network of semantically linked actors as attack clues to generate diverse and effective attack paths toward harmful targets. ActorAttack addresses two main challenges in multi-turn attacks: (1) concealing harmful intents by creating an innocuous conversation topic about the actor, and (2) uncovering diverse attack paths towards the same harmful target by leveraging LLMs' knowledge to specify the correlated actors as various attack clues. In this way, ActorAttack outperforms existing single-turn and multi-turn attack methods across advanced aligned LLMs, even for GPT-o1. We will publish a dataset called SafeMTData, which includes multi-turn adversarial prompts and safety alignment data, generated by ActorAttack. We demonstrate that models safety-tuned using our safety dataset are more robust to multi-turn attacks. Code is available at https://github.com/renqibing/ActorAttack. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10700v1-abstract-full').style.display = 'none'; document.getElementById('2410.10700v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09374">arXiv:2410.09374</a> <span> [<a href="https://arxiv.org/pdf/2410.09374">pdf</a>, <a href="https://arxiv.org/format/2410.09374">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> ESVO2: Direct Visual-Inertial Odometry with Stereo Event Cameras </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Niu%2C+J">Junkai Niu</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+S">Sheng Zhong</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiuyuan Lu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+S">Shaojie Shen</a>, <a href="/search/cs?searchtype=author&query=Gallego%2C+G">Guillermo Gallego</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yi Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09374v1-abstract-short" style="display: inline;"> Event-based visual odometry is a specific branch of visual Simultaneous Localization and Mapping (SLAM) techniques, which aims at solving tracking and mapping sub-problems in parallel by exploiting the special working principles of neuromorphic (ie, event-based) cameras. Due to the motion-dependent nature of event data, explicit data association ie, feature matching under large-baseline view-point… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09374v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09374v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09374v1-abstract-full" style="display: none;"> Event-based visual odometry is a specific branch of visual Simultaneous Localization and Mapping (SLAM) techniques, which aims at solving tracking and mapping sub-problems in parallel by exploiting the special working principles of neuromorphic (ie, event-based) cameras. Due to the motion-dependent nature of event data, explicit data association ie, feature matching under large-baseline view-point changes is hardly established, making direct methods a more rational choice. However, state-of-the-art direct methods are limited by the high computational complexity of the mapping sub-problem and the degeneracy of camera pose tracking in certain degrees of freedom (DoF) in rotation. In this paper, we resolve these issues by building an event-based stereo visual-inertial odometry system on top of our previous direct pipeline Event-based Stereo Visual Odometry. Specifically, to speed up the mapping operation, we propose an efficient strategy for sampling contour points according to the local dynamics of events. The mapping performance is also improved in terms of structure completeness and local smoothness by merging the temporal stereo and static stereo results. To circumvent the degeneracy of camera pose tracking in recovering the pitch and yaw components of general six-DoF motion, we introduce IMU measurements as motion priors via pre-integration. To this end, a compact back-end is proposed for continuously updating the IMU bias and predicting the linear velocity, enabling an accurate motion prediction for camera pose tracking. The resulting system scales well with modern high-resolution event cameras and leads to better global positioning accuracy in large-scale outdoor environments. Extensive evaluations on five publicly available datasets featuring different resolutions and scenarios justify the superior performance of the proposed system against five state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09374v1-abstract-full').style.display = 'none'; document.getElementById('2410.09374v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06877">arXiv:2410.06877</a> <span> [<a href="https://arxiv.org/pdf/2410.06877">pdf</a>, <a href="https://arxiv.org/ps/2410.06877">ps</a>, <a href="https://arxiv.org/format/2410.06877">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Best-of-Both-Worlds Fair Allocation of Indivisible and Mixed Goods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bu%2C+X">Xiaolin Bu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zihao Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shengxin Liu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xinhang Lu</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+B">Biaoshuai Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06877v2-abstract-short" style="display: inline;"> We study the problem of fairly allocating either a set of indivisible goods or a set of mixed divisible and indivisible goods (i.e., mixed goods) to agents with additive utilities, taking the best-of-both-worlds perspective of guaranteeing fairness properties both ex ante and ex post. The ex-post fairness notions considered in this paper are relaxations of envy-freeness, specifically, EFX for indi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06877v2-abstract-full').style.display = 'inline'; document.getElementById('2410.06877v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06877v2-abstract-full" style="display: none;"> We study the problem of fairly allocating either a set of indivisible goods or a set of mixed divisible and indivisible goods (i.e., mixed goods) to agents with additive utilities, taking the best-of-both-worlds perspective of guaranteeing fairness properties both ex ante and ex post. The ex-post fairness notions considered in this paper are relaxations of envy-freeness, specifically, EFX for indivisible-goods allocation, and EFM for mixed-goods allocation. For two agents, we show that there is a polynomial-time randomized algorithm that achieves ex-ante envy-freeness and ex-post EFX / EFM simultaneously. For $n$ agents with bi-valued utilities, we show there exist randomized allocations that are (i) ex-ante proportional and ex-post EFM, and (ii) ex-ante envy-free, ex-post EFX, and ex-post fractionally Pareto optimal. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06877v2-abstract-full').style.display = 'none'; document.getElementById('2410.06877v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Appears in the 20th Conference on Web and Internet Economics (WINE), 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05584">arXiv:2410.05584</a> <span> [<a href="https://arxiv.org/pdf/2410.05584">pdf</a>, <a href="https://arxiv.org/format/2410.05584">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Reward Model Evaluation: Are We Barking up the Wrong Tree? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+X">Xueru Wen</a>, <a href="/search/cs?searchtype=author&query=Lou%2C+J">Jie Lou</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yaojie Lu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hongyu Lin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xing Yu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xinyu Lu</a>, <a href="/search/cs?searchtype=author&query=He%2C+B">Ben He</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xianpei Han</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Debing Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Le Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05584v2-abstract-short" style="display: inline;"> Reward Models (RMs) are crucial for aligning language models with human preferences. Currently, the evaluation of RMs depends on measuring accuracy against a validation set of manually annotated preference data. Although this method is straightforward and widely adopted, the relationship between RM accuracy and downstream policy performance remains under-explored. In this work, we conduct experime… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05584v2-abstract-full').style.display = 'inline'; document.getElementById('2410.05584v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05584v2-abstract-full" style="display: none;"> Reward Models (RMs) are crucial for aligning language models with human preferences. Currently, the evaluation of RMs depends on measuring accuracy against a validation set of manually annotated preference data. Although this method is straightforward and widely adopted, the relationship between RM accuracy and downstream policy performance remains under-explored. In this work, we conduct experiments in a synthetic setting to investigate how differences in RM measured by accuracy translate into gaps in optimized policy performance. Our findings reveal that while there is a weak positive correlation between accuracy and downstream performance, policies optimized towards RMs with similar accuracy can exhibit quite different performance. Moreover, we discover that the way of measuring accuracy significantly impacts its ability to predict the final policy performance. Through the lens of Regressional Goodhart's effect, we identify the existence of exogenous variables impacting the relationship between RM quality measured by accuracy and policy model capability. This underscores the inadequacy of relying solely on accuracy to reflect their impact on policy optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05584v2-abstract-full').style.display = 'none'; document.getElementById('2410.05584v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04265">arXiv:2410.04265</a> <span> [<a href="https://arxiv.org/pdf/2410.04265">pdf</a>, <a href="https://arxiv.org/format/2410.04265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AI as Humanity's Salieri: Quantifying Linguistic Creativity of Language Models via Systematic Attribution of Machine Text against Web Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Ximing Lu</a>, <a href="/search/cs?searchtype=author&query=Sclar%2C+M">Melanie Sclar</a>, <a href="/search/cs?searchtype=author&query=Hallinan%2C+S">Skyler Hallinan</a>, <a href="/search/cs?searchtype=author&query=Mireshghallah%2C+N">Niloofar Mireshghallah</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiacheng Liu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+S">Seungju Han</a>, <a href="/search/cs?searchtype=author&query=Ettinger%2C+A">Allyson Ettinger</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Liwei Jiang</a>, <a href="/search/cs?searchtype=author&query=Chandu%2C+K">Khyathi Chandu</a>, <a href="/search/cs?searchtype=author&query=Dziri%2C+N">Nouha Dziri</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04265v1-abstract-short" style="display: inline;"> Creativity has long been considered one of the most difficult aspect of human intelligence for AI to mimic. However, the rise of Large Language Models (LLMs), like ChatGPT, has raised questions about whether AI can match or even surpass human creativity. We present CREATIVITY INDEX as the first step to quantify the linguistic creativity of a text by reconstructing it from existing text snippets on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04265v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04265v1-abstract-full" style="display: none;"> Creativity has long been considered one of the most difficult aspect of human intelligence for AI to mimic. However, the rise of Large Language Models (LLMs), like ChatGPT, has raised questions about whether AI can match or even surpass human creativity. We present CREATIVITY INDEX as the first step to quantify the linguistic creativity of a text by reconstructing it from existing text snippets on the web. CREATIVITY INDEX is motivated by the hypothesis that the seemingly remarkable creativity of LLMs may be attributable in large part to the creativity of human-written texts on the web. To compute CREATIVITY INDEX efficiently, we introduce DJ SEARCH, a novel dynamic programming algorithm that can search verbatim and near-verbatim matches of text snippets from a given document against the web. Experiments reveal that the CREATIVITY INDEX of professional human authors is on average 66.2% higher than that of LLMs, and that alignment reduces the CREATIVITY INDEX of LLMs by an average of 30.1%. In addition, we find that distinguished authors like Hemingway exhibit measurably higher CREATIVITY INDEX compared to other human writers. Finally, we demonstrate that CREATIVITY INDEX can be used as a surprisingly effective criterion for zero-shot machine text detection, surpassing the strongest existing zero-shot system, DetectGPT, by a significant margin of 30.2%, and even outperforming the strongest supervised system, GhostBuster, in five out of six domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04265v1-abstract-full').style.display = 'none'; document.getElementById('2410.04265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04013">arXiv:2410.04013</a> <span> [<a href="https://arxiv.org/pdf/2410.04013">pdf</a>, <a href="https://arxiv.org/format/2410.04013">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Temporal Link Prediction via Temporal Walk Matrix Projection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaodong Lu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Leilei Sun</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+T">Tongyu Zhu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+W">Weifeng Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04013v1-abstract-short" style="display: inline;"> Temporal link prediction, aiming at predicting future interactions among entities based on historical interactions, is crucial for a series of real-world applications. Although previous methods have demonstrated the importance of relative encodings for effective temporal link prediction, computational efficiency remains a major concern in constructing these encodings. Moreover, existing relative e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04013v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04013v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04013v1-abstract-full" style="display: none;"> Temporal link prediction, aiming at predicting future interactions among entities based on historical interactions, is crucial for a series of real-world applications. Although previous methods have demonstrated the importance of relative encodings for effective temporal link prediction, computational efficiency remains a major concern in constructing these encodings. Moreover, existing relative encodings are usually constructed based on structural connectivity, where temporal information is seldom considered. To address the aforementioned issues, we first analyze existing relative encodings and unify them as a function of temporal walk matrices. This unification establishes a connection between relative encodings and temporal walk matrices, providing a more principled way for analyzing and designing relative encodings. Based on this analysis, we propose a new temporal graph neural network called TPNet, which introduces a temporal walk matrix that incorporates the time decay effect to simultaneously consider both temporal and structural information. Moreover, TPNet designs a random feature propagation mechanism with theoretical guarantees to implicitly maintain the temporal walk matrices, which improves the computation and storage efficiency. Experimental results on 13 benchmark datasets verify the effectiveness and efficiency of TPNet, where TPNet outperforms other baselines on most datasets and achieves a maximum speedup of $33.3 \times$ compared to the SOTA baseline. Our code can be found at \url{https://github.com/lxd99/TPNet}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04013v1-abstract-full').style.display = 'none'; document.getElementById('2410.04013v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024 Paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01246">arXiv:2410.01246</a> <span> [<a href="https://arxiv.org/pdf/2410.01246">pdf</a>, <a href="https://arxiv.org/format/2410.01246">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AHP-Powered LLM Reasoning for Multi-Criteria Evaluation of Open-Ended Responses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaotian Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiyi Li</a>, <a href="/search/cs?searchtype=author&query=Takeuchi%2C+K">Koh Takeuchi</a>, <a href="/search/cs?searchtype=author&query=Kashima%2C+H">Hisashi Kashima</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01246v1-abstract-short" style="display: inline;"> Question answering (QA) tasks have been extensively studied in the field of natural language processing (NLP). Answers to open-ended questions are highly diverse and difficult to quantify, and cannot be simply evaluated as correct or incorrect, unlike close-ended questions with definitive answers. While large language models (LLMs) have demonstrated strong capabilities across various tasks, they e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01246v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01246v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01246v1-abstract-full" style="display: none;"> Question answering (QA) tasks have been extensively studied in the field of natural language processing (NLP). Answers to open-ended questions are highly diverse and difficult to quantify, and cannot be simply evaluated as correct or incorrect, unlike close-ended questions with definitive answers. While large language models (LLMs) have demonstrated strong capabilities across various tasks, they exhibit relatively weaker performance in evaluating answers to open-ended questions. In this study, we propose a method that leverages LLMs and the analytic hierarchy process (AHP) to assess answers to open-ended questions. We utilized LLMs to generate multiple evaluation criteria for a question. Subsequently, answers were subjected to pairwise comparisons under each criterion with LLMs, and scores for each answer were calculated in the AHP. We conducted experiments on four datasets using both ChatGPT-3.5-turbo and GPT-4. Our results indicate that our approach more closely aligns with human judgment compared to the four baselines. Additionally, we explored the impact of the number of criteria, variations in models, and differences in datasets on the results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01246v1-abstract-full').style.display = 'none'; document.getElementById('2410.01246v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for EMNLP 2024 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00667">arXiv:2410.00667</a> <span> [<a href="https://arxiv.org/pdf/2410.00667">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Classical Physics">physics.class-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.jenvman.2024.123321">10.1016/j.jenvman.2024.123321 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Contribution of soundscape appropriateness to soundscape quality assessment in space: a mediating variable affecting acoustic comfort </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xinhao Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Guangyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaodong Lu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+J">Jian Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00667v2-abstract-short" style="display: inline;"> Soundscape appropriateness (SA) provides supplemental information on the matching degree between auditory information and the surrounding scene in soundscape perception. This indicator has been integrated into the standard ISO process for collecting soundscape data, forming a component of the sound quality assessment questionnaire. However, its role in soundscape quality assessment has not been fu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00667v2-abstract-full').style.display = 'inline'; document.getElementById('2410.00667v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00667v2-abstract-full" style="display: none;"> Soundscape appropriateness (SA) provides supplemental information on the matching degree between auditory information and the surrounding scene in soundscape perception. This indicator has been integrated into the standard ISO process for collecting soundscape data, forming a component of the sound quality assessment questionnaire. However, its role in soundscape quality assessment has not been fully understood. Herein, we present the findings from soundscape data collected from Beiling Park in Shenyang, China. A method was developed that integrates mediation effect models with multiscale geographically weighted regression models to explore the mediating role of SA in the impact of sound source types on soundscape quality, as well as the spatial heterogeneity of this mediation effect. The results confirm that SA does mediates the influence of sound source types on acoustics comfort (AC). Specifically, natural sounds (indirect effect/total effect = .19/.19), traffic sounds (indirect effect/total effect = -.46/-.65), and commercial sounds (indirect effect/total effect = -.25/-.12) impact the perception of AC by either enhancing or reducing SA. Moreover, the relationships among variables depicted in this model demonstrate spatial heterogeneity, demonstrating that in urban open spaces with complex constructures, local spatial models may be needed for soundscape assessment. The research reaffirms the significance of SA in urban open spaces. In terms of practical implications for urban and landscape planners, when sound sources cannot be controlled or altered, coordinating between the sound and the surrounding environment through landscape optimisation could also improve the quality of the soundscape through enhancing SA and help achieve the goal of creating healthy urban open spaces. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00667v2-abstract-full').style.display = 'none'; document.getElementById('2410.00667v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Journal of Environmental Management</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19745">arXiv:2409.19745</a> <span> [<a href="https://arxiv.org/pdf/2409.19745">pdf</a>, <a href="https://arxiv.org/format/2409.19745">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PEAR: Position-Embedding-Agnostic Attention Re-weighting Enhances Retrieval-Augmented Generation with Zero Inference Overhead </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+T">Tao Tan</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+Y">Yining Qian</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+A">Ang Lv</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hongzhan Lin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Songhao Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yongbo Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Feng Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jingtong Wu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xin Lu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+R">Rui Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19745v2-abstract-short" style="display: inline;"> Large language models (LLMs) enhanced with retrieval-augmented generation (RAG) have introduced a new paradigm for web search. However, the limited context awareness of LLMs degrades their performance on RAG tasks. Existing methods to enhance context awareness are often inefficient, incurring time or memory overhead during inference, and many are tailored to specific position embeddings. In this p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19745v2-abstract-full').style.display = 'inline'; document.getElementById('2409.19745v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19745v2-abstract-full" style="display: none;"> Large language models (LLMs) enhanced with retrieval-augmented generation (RAG) have introduced a new paradigm for web search. However, the limited context awareness of LLMs degrades their performance on RAG tasks. Existing methods to enhance context awareness are often inefficient, incurring time or memory overhead during inference, and many are tailored to specific position embeddings. In this paper, we propose Position-Embedding-Agnostic attention Re-weighting (PEAR), which enhances the context awareness of LLMs with zero inference overhead. Specifically, on a proxy task focused on context copying, we first detect heads which suppress the models' context awareness thereby diminishing RAG performance. To weaken the impact of these heads, we re-weight their outputs with learnable coefficients. The LLM (with frozen parameters) is optimized by adjusting these coefficients to minimize loss on the proxy task. As a result, the coefficients are optimized to values less than one, thereby reducing their tendency to suppress RAG performance. During inference, the optimized coefficients are fixed to re-weight these heads, regardless of the specific task at hand. Our proposed PEAR offers two major advantages over previous approaches: (1) It introduces zero additional inference overhead in terms of memory usage or inference time, while outperforming competitive baselines in accuracy and efficiency across various RAG tasks. (2) It is independent of position embedding algorithms, ensuring broader applicability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19745v2-abstract-full').style.display = 'none'; document.getElementById('2409.19745v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18828">arXiv:2409.18828</a> <span> [<a href="https://arxiv.org/pdf/2409.18828">pdf</a>, <a href="https://arxiv.org/format/2409.18828">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MECG-E: Mamba-based ECG Enhancer for Baseline Wander Removal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hung%2C+K">Kuo-Hsuan Hung</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kuan-Chen Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kai-Chun Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei-Lun Chen</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xugang Lu</a>, <a href="/search/cs?searchtype=author&query=Tsao%2C+Y">Yu Tsao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chii-Wann Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18828v2-abstract-short" style="display: inline;"> Electrocardiogram (ECG) is an important non-invasive method for diagnosing cardiovascular disease. However, ECG signals are susceptible to noise contamination, such as electrical interference or signal wandering, which reduces diagnostic accuracy. Various ECG denoising methods have been proposed, but most existing methods yield suboptimal performance under very noisy conditions or require several… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18828v2-abstract-full').style.display = 'inline'; document.getElementById('2409.18828v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18828v2-abstract-full" style="display: none;"> Electrocardiogram (ECG) is an important non-invasive method for diagnosing cardiovascular disease. However, ECG signals are susceptible to noise contamination, such as electrical interference or signal wandering, which reduces diagnostic accuracy. Various ECG denoising methods have been proposed, but most existing methods yield suboptimal performance under very noisy conditions or require several steps during inference, leading to latency during online processing. In this paper, we propose a novel ECG denoising model, namely Mamba-based ECG Enhancer (MECG-E), which leverages the Mamba architecture known for its fast inference and outstanding nonlinear mapping capabilities. Experimental results indicate that MECG-E surpasses several well-known existing models across multiple metrics under different noise conditions. Additionally, MECG-E requires less inference time than state-of-the-art diffusion-based ECG denoisers, demonstrating the model's functionality and efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18828v2-abstract-full').style.display = 'none'; document.getElementById('2409.18828v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IEEE BigData 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17907">arXiv:2409.17907</a> <span> [<a href="https://arxiv.org/pdf/2409.17907">pdf</a>, <a href="https://arxiv.org/format/2409.17907">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.14722/ndss.2025.23997">10.14722/ndss.2025.23997 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PhantomLiDAR: Cross-modality Signal Injection Attacks against LiDAR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zizhi Jin</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Q">Qinhong Jiang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuancun Lu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+C">Chen Yan</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+X">Xiaoyu Ji</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Wenyuan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17907v1-abstract-short" style="display: inline;"> LiDAR (Light Detection and Ranging) is a pivotal sensor for autonomous driving, offering precise 3D spatial information. Previous signal attacks against LiDAR systems mainly exploit laser signals. In this paper, we investigate the possibility of cross-modality signal injection attacks, i.e., injecting intentional electromagnetic interference (IEMI) to manipulate LiDAR output. Our insight is that t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17907v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17907v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17907v1-abstract-full" style="display: none;"> LiDAR (Light Detection and Ranging) is a pivotal sensor for autonomous driving, offering precise 3D spatial information. Previous signal attacks against LiDAR systems mainly exploit laser signals. In this paper, we investigate the possibility of cross-modality signal injection attacks, i.e., injecting intentional electromagnetic interference (IEMI) to manipulate LiDAR output. Our insight is that the internal modules of a LiDAR, i.e., the laser receiving circuit, the monitoring sensors, and the beam-steering modules, even with strict electromagnetic compatibility (EMC) testing, can still couple with the IEMI attack signals and result in the malfunction of LiDAR systems. Based on the above attack surfaces, we propose the PhantomLiDAR attack, which manipulates LiDAR output in terms of Points Interference, Points Injection, Points Removal, and even LiDAR Power-Off. We evaluate and demonstrate the effectiveness of PhantomLiDAR with both simulated and real-world experiments on five COTS LiDAR systems. We also conduct feasibility experiments in real-world moving scenarios. We provide potential defense measures that can be implemented at both the sensor level and the vehicle system level to mitigate the risks associated with IEMI attacks. Video demonstrations can be viewed at https://sites.google.com/view/phantomlidar. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17907v1-abstract-full').style.display = 'none'; document.getElementById('2409.17907v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17591">arXiv:2409.17591</a> <span> [<a href="https://arxiv.org/pdf/2409.17591">pdf</a>, <a href="https://arxiv.org/format/2409.17591">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Conjugate Bayesian Two-step Change Point Detection for Hawkes Process </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zeyue Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaoling Lu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Feng Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17591v4-abstract-short" style="display: inline;"> The Bayesian two-step change point detection method is popular for the Hawkes process due to its simplicity and intuitiveness. However, the non-conjugacy between the point process likelihood and the prior requires most existing Bayesian two-step change point detection methods to rely on non-conjugate inference methods. These methods lack analytical expressions, leading to low computational efficie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17591v4-abstract-full').style.display = 'inline'; document.getElementById('2409.17591v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17591v4-abstract-full" style="display: none;"> The Bayesian two-step change point detection method is popular for the Hawkes process due to its simplicity and intuitiveness. However, the non-conjugacy between the point process likelihood and the prior requires most existing Bayesian two-step change point detection methods to rely on non-conjugate inference methods. These methods lack analytical expressions, leading to low computational efficiency and impeding timely change point detection. To address this issue, this work employs data augmentation to propose a conjugate Bayesian two-step change point detection method for the Hawkes process, which proves to be more accurate and efficient. Extensive experiments on both synthetic and real data demonstrate the superior effectiveness and efficiency of our method compared to baseline methods. Additionally, we conduct ablation studies to explore the robustness of our method concerning various hyperparameters. Our code is publicly available at https://github.com/Aurora2050/CoBay-CPD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17591v4-abstract-full').style.display = 'none'; document.getElementById('2409.17591v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16427">arXiv:2409.16427</a> <span> [<a href="https://arxiv.org/pdf/2409.16427">pdf</a>, <a href="https://arxiv.org/format/2409.16427">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HAICOSYSTEM: An Ecosystem for Sandboxing Safety Risks in Human-AI Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xuhui Zhou</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+H">Hyunwoo Kim</a>, <a href="/search/cs?searchtype=author&query=Brahman%2C+F">Faeze Brahman</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Liwei Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Hao Zhu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Ximing Lu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Frank Xu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+B+Y">Bill Yuchen Lin</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Mireshghallah%2C+N">Niloofar Mireshghallah</a>, <a href="/search/cs?searchtype=author&query=Bras%2C+R+L">Ronan Le Bras</a>, <a href="/search/cs?searchtype=author&query=Sap%2C+M">Maarten Sap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16427v3-abstract-short" style="display: inline;"> AI agents are increasingly autonomous in their interactions with human users and tools, leading to increased interactional safety risks. We present HAICOSYSTEM, a framework examining AI agent safety within diverse and complex social interactions. HAICOSYSTEM features a modular sandbox environment that simulates multi-turn interactions between human users and AI agents, where the AI agents are equi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16427v3-abstract-full').style.display = 'inline'; document.getElementById('2409.16427v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16427v3-abstract-full" style="display: none;"> AI agents are increasingly autonomous in their interactions with human users and tools, leading to increased interactional safety risks. We present HAICOSYSTEM, a framework examining AI agent safety within diverse and complex social interactions. HAICOSYSTEM features a modular sandbox environment that simulates multi-turn interactions between human users and AI agents, where the AI agents are equipped with a variety of tools (e.g., patient management platforms) to navigate diverse scenarios (e.g., a user attempting to access other patients' profiles). To examine the safety of AI agents in these interactions, we develop a comprehensive multi-dimensional evaluation framework that uses metrics covering operational, content-related, societal, and legal risks. Through running 1840 simulations based on 92 scenarios across seven domains (e.g., healthcare, finance, education), we demonstrate that HAICOSYSTEM can emulate realistic user-AI interactions and complex tool use by AI agents. Our experiments show that state-of-the-art LLMs, both proprietary and open-sourced, exhibit safety risks in over 50\% cases, with models generally showing higher risks when interacting with simulated malicious users. Our findings highlight the ongoing challenge of building agents that can safely navigate complex interactions, particularly when faced with malicious users. To foster the AI agent safety ecosystem, we release a code platform that allows practitioners to create custom scenarios, simulate interactions, and evaluate the safety and performance of their agents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16427v3-abstract-full').style.display = 'none'; document.getElementById('2409.16427v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Both the second and third authors contributed equally</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14827">arXiv:2409.14827</a> <span> [<a href="https://arxiv.org/pdf/2409.14827">pdf</a>, <a href="https://arxiv.org/format/2409.14827">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> AIM 2024 Challenge on Video Saliency Prediction: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Moskalenko%2C+A">Andrey Moskalenko</a>, <a href="/search/cs?searchtype=author&query=Bryncev%2C+A">Alexey Bryncev</a>, <a href="/search/cs?searchtype=author&query=Vatolin%2C+D">Dmitry Vatolin</a>, <a href="/search/cs?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+G">Gen Zhan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Li Yang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yunlong Tang</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+Y">Yiting Liao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jiongzhi Lin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+B">Baitao Huang</a>, <a href="/search/cs?searchtype=author&query=Moradi%2C+M">Morteza Moradi</a>, <a href="/search/cs?searchtype=author&query=Moradi%2C+M">Mohammad Moradi</a>, <a href="/search/cs?searchtype=author&query=Rundo%2C+F">Francesco Rundo</a>, <a href="/search/cs?searchtype=author&query=Spampinato%2C+C">Concetto Spampinato</a>, <a href="/search/cs?searchtype=author&query=Borji%2C+A">Ali Borji</a>, <a href="/search/cs?searchtype=author&query=Palazzo%2C+S">Simone Palazzo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuxin Zhu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yinan Sun</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+H">Huiyu Duan</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuqin Cao</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+Z">Ziheng Jia</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Q">Qiang Hu</a>, <a href="/search/cs?searchtype=author&query=Min%2C+X">Xiongkuo Min</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+G">Guangtao Zhai</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+H">Hao Fang</a> , et al. (8 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14827v1-abstract-short" style="display: inline;"> This paper reviews the Challenge on Video Saliency Prediction at AIM 2024. The goal of the participants was to develop a method for predicting accurate saliency maps for the provided set of video sequences. Saliency maps are widely exploited in various applications, including video compression, quality assessment, visual perception studies, the advertising industry, etc. For this competition, a pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14827v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14827v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14827v1-abstract-full" style="display: none;"> This paper reviews the Challenge on Video Saliency Prediction at AIM 2024. The goal of the participants was to develop a method for predicting accurate saliency maps for the provided set of video sequences. Saliency maps are widely exploited in various applications, including video compression, quality assessment, visual perception studies, the advertising industry, etc. For this competition, a previously unused large-scale audio-visual mouse saliency (AViMoS) dataset of 1500 videos with more than 70 observers per video was collected using crowdsourced mouse tracking. The dataset collection methodology has been validated using conventional eye-tracking data and has shown high consistency. Over 30 teams registered in the challenge, and there are 7 teams that submitted the results in the final phase. The final phase solutions were tested and ranked by commonly used quality metrics on a private test subset. The results of this evaluation and the descriptions of the solutions are presented in this report. All data, including the private test subset, is made publicly available on the challenge homepage - https://challenges.videoprocessing.ai/challenges/video-saliency-prediction.html. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14827v1-abstract-full').style.display = 'none'; document.getElementById('2409.14827v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCVW 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.6; I.2.10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12105">arXiv:2409.12105</a> <span> [<a href="https://arxiv.org/pdf/2409.12105">pdf</a>, <a href="https://arxiv.org/format/2409.12105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FedLF: Adaptive Logit Adjustment and Feature Optimization in Federated Long-Tailed Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiuhua Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Peng Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xuefeng Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12105v1-abstract-short" style="display: inline;"> Federated learning offers a paradigm to the challenge of preserving privacy in distributed machine learning. However, datasets distributed across each client in the real world are inevitably heterogeneous, and if the datasets can be globally aggregated, they tend to be long-tailed distributed, which greatly affects the performance of the model. The traditional approach to federated learning primar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12105v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12105v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12105v1-abstract-full" style="display: none;"> Federated learning offers a paradigm to the challenge of preserving privacy in distributed machine learning. However, datasets distributed across each client in the real world are inevitably heterogeneous, and if the datasets can be globally aggregated, they tend to be long-tailed distributed, which greatly affects the performance of the model. The traditional approach to federated learning primarily addresses the heterogeneity of data among clients, yet it fails to address the phenomenon of class-wise bias in global long-tailed data. This results in the trained model focusing on the head classes while neglecting the equally important tail classes. Consequently, it is essential to develop a methodology that considers classes holistically. To address the above problems, we propose a new method FedLF, which introduces three modifications in the local training phase: adaptive logit adjustment, continuous class centred optimization, and feature decorrelation. We compare seven state-of-the-art methods with varying degrees of data heterogeneity and long-tailed distribution. Extensive experiments on benchmark datasets CIFAR-10-LT and CIFAR-100-LT demonstrate that our approach effectively mitigates the problem of model performance degradation due to data heterogeneity and long-tailed distribution. our code is available at https://github.com/18sym/FedLF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12105v1-abstract-full').style.display = 'none'; document.getElementById('2409.12105v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11724">arXiv:2409.11724</a> <span> [<a href="https://arxiv.org/pdf/2409.11724">pdf</a>, <a href="https://arxiv.org/format/2409.11724">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TART: An Open-Source Tool-Augmented Framework for Explainable Table-based Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xinyuan Lu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+L">Liangming Pan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yubo Ma</a>, <a href="/search/cs?searchtype=author&query=Nakov%2C+P">Preslav Nakov</a>, <a href="/search/cs?searchtype=author&query=Kan%2C+M">Min-Yen Kan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11724v2-abstract-short" style="display: inline;"> Current Large Language Models (LLMs) exhibit limited ability to understand table structures and to apply precise numerical reasoning, which is crucial for tasks such as table question answering (TQA) and table-based fact verification (TFV). To address these challenges, we introduce our Tool-Augmented Reasoning framework for Tables (TART), which integrates LLMs with specialized tools. TART contains… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11724v2-abstract-full').style.display = 'inline'; document.getElementById('2409.11724v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11724v2-abstract-full" style="display: none;"> Current Large Language Models (LLMs) exhibit limited ability to understand table structures and to apply precise numerical reasoning, which is crucial for tasks such as table question answering (TQA) and table-based fact verification (TFV). To address these challenges, we introduce our Tool-Augmented Reasoning framework for Tables (TART), which integrates LLMs with specialized tools. TART contains three key components: a table formatter to ensure accurate data representation, a tool maker to develop specific computational tools, and an explanation generator to maintain explainability. We also present the TOOLTAB dataset, a new benchmark designed specifically for training LLMs in table-tool integration. Our experiments indicate that TART achieves substantial improvements over existing methods (e.g., Chain-of-Thought) by improving both the precision of data processing and the clarity of the reasoning process. Notably, TART paired with CodeLlama achieves 90.0% of the accuracy of the closed-sourced LLM GPT-3.5-turbo, highlighting its robustness in diverse real-world scenarios. All the code and data are available at https://github.com/XinyuanLu00/TART. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11724v2-abstract-full').style.display = 'none'; document.getElementById('2409.11724v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">technical report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10644">arXiv:2409.10644</a> <span> [<a href="https://arxiv.org/pdf/2409.10644">pdf</a>, <a href="https://arxiv.org/format/2409.10644">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Improving Multi-candidate Speculative Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaofan Lu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yixiao Zeng</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+F">Feiyang Ma</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zixu Yu</a>, <a href="/search/cs?searchtype=author&query=Levorato%2C+M">Marco Levorato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10644v2-abstract-short" style="display: inline;"> Speculative Decoding (SD) is a technique to accelerate the inference of Large Language Models (LLMs) by using a lower complexity draft model to propose candidate tokens verified by a larger target model. To further improve efficiency, Multi-Candidate Speculative Decoding (MCSD) improves upon this by sampling multiple candidate tokens from the draft model at each step and verifying them in parallel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10644v2-abstract-full').style.display = 'inline'; document.getElementById('2409.10644v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10644v2-abstract-full" style="display: none;"> Speculative Decoding (SD) is a technique to accelerate the inference of Large Language Models (LLMs) by using a lower complexity draft model to propose candidate tokens verified by a larger target model. To further improve efficiency, Multi-Candidate Speculative Decoding (MCSD) improves upon this by sampling multiple candidate tokens from the draft model at each step and verifying them in parallel, thus increasing the chances of accepting a token and reducing generation time. Existing MCSD methods rely on the draft model to initialize the multi-candidate sequences and use static length and tree attention structure for draft generation. However, such an approach suffers from the draft and target model's output distribution differences, especially in a dynamic generation context. In this work, we introduce a new version of MCSD that includes a target model initialized multi-candidate generation, a dynamic sliced topology-aware causal mask for dynamic length adjustment, and decision models to optimize early stopping. We experimented with our method on Llama 2-7B and its variants and observed a maximum 27.5% speedup compared to our MCSD baseline across three benchmarks with Llama 2-7B as the target model and JackFram 68M as the draft model. Additionally, we evaluate the effects of using the target model initialized multi-candidate process with different draft models on output quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10644v2-abstract-full').style.display = 'none'; document.getElementById('2409.10644v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS ENLSP 2024 Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09396">arXiv:2409.09396</a> <span> [<a href="https://arxiv.org/pdf/2409.09396">pdf</a>, <a href="https://arxiv.org/format/2409.09396">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Channel Adaptation for Speaker Verification Using Optimal Transport with Pseudo Label </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wenhao Yang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+J">Jianguo Wei</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+W">Wenhuan Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lei Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xugang Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09396v1-abstract-short" style="display: inline;"> Domain gap often degrades the performance of speaker verification (SV) systems when the statistical distributions of training data and real-world test speech are mismatched. Channel variation, a primary factor causing this gap, is less addressed than other issues (e.g., noise). Although various domain adaptation algorithms could be applied to handle this domain gap problem, most algorithms could n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09396v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09396v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09396v1-abstract-full" style="display: none;"> Domain gap often degrades the performance of speaker verification (SV) systems when the statistical distributions of training data and real-world test speech are mismatched. Channel variation, a primary factor causing this gap, is less addressed than other issues (e.g., noise). Although various domain adaptation algorithms could be applied to handle this domain gap problem, most algorithms could not take the complex distribution structure in domain alignment with discriminative learning. In this paper, we propose a novel unsupervised domain adaptation method, i.e., Joint Partial Optimal Transport with Pseudo Label (JPOT-PL), to alleviate the channel mismatch problem. Leveraging the geometric-aware distance metric of optimal transport in distribution alignment, we further design a pseudo label-based discriminative learning where the pseudo label can be regarded as a new type of soft speaker label derived from the optimal coupling. With the JPOT-PL, we carry out experiments on the SV channel adaptation task with VoxCeleb as the basis corpus. Experiments show our method reduces EER by over 10% compared with several state-of-the-art channel adaptation algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09396v1-abstract-full').style.display = 'none'; document.getElementById('2409.09396v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, submitted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09389">arXiv:2409.09389</a> <span> [<a href="https://arxiv.org/pdf/2409.09389">pdf</a>, <a href="https://arxiv.org/format/2409.09389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Integrated Multi-Level Knowledge Distillation for Enhanced Speaker Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wenhao Yang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+J">Jianguo Wei</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+W">Wenhuan Lu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xugang Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lei Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09389v1-abstract-short" style="display: inline;"> Knowledge distillation (KD) is widely used in audio tasks, such as speaker verification (SV), by transferring knowledge from a well-trained large model (the teacher) to a smaller, more compact model (the student) for efficiency and portability. Existing KD methods for SV often mirror those used in image processing, focusing on approximating predicted probabilities and hidden representations. Howev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09389v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09389v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09389v1-abstract-full" style="display: none;"> Knowledge distillation (KD) is widely used in audio tasks, such as speaker verification (SV), by transferring knowledge from a well-trained large model (the teacher) to a smaller, more compact model (the student) for efficiency and portability. Existing KD methods for SV often mirror those used in image processing, focusing on approximating predicted probabilities and hidden representations. However, these methods fail to account for the multi-level temporal properties of speech audio. In this paper, we propose a novel KD method, i.e., Integrated Multi-level Knowledge Distillation (IML-KD), to transfer knowledge of various temporal-scale features of speech from a teacher model to a student model. In the IML-KD, temporal context information from the teacher model is integrated into novel Integrated Gradient-based input-sensitive representations from speech segments with various durations, and the student model is trained to infer these representations with multi-level alignment for the output. We conduct SV experiments on the VoxCeleb1 dataset to evaluate the proposed method. Experimental results demonstrate that IML-KD significantly enhances KD performance, reducing the Equal Error Rate (EER) by 5%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09389v1-abstract-full').style.display = 'none'; document.getElementById('2409.09389v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, submitted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08356">arXiv:2409.08356</a> <span> [<a href="https://arxiv.org/pdf/2409.08356">pdf</a>, <a href="https://arxiv.org/format/2409.08356">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mathematical Finance">q-fin.MF</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> COMEX Copper Futures Volatility Forecasting: Econometric Models and Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zian Wang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xinyi Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08356v1-abstract-short" style="display: inline;"> This paper investigates the forecasting performance of COMEX copper futures realized volatility across various high-frequency intervals using both econometric volatility models and deep learning recurrent neural network models. The econometric models considered are GARCH and HAR, while the deep learning models include RNN (Recurrent Neural Network), LSTM (Long Short-Term Memory), and GRU (Gated Re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08356v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08356v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08356v1-abstract-full" style="display: none;"> This paper investigates the forecasting performance of COMEX copper futures realized volatility across various high-frequency intervals using both econometric volatility models and deep learning recurrent neural network models. The econometric models considered are GARCH and HAR, while the deep learning models include RNN (Recurrent Neural Network), LSTM (Long Short-Term Memory), and GRU (Gated Recurrent Unit). In forecasting daily realized volatility for COMEX copper futures with a rolling window approach, the econometric models, particularly HAR, outperform recurrent neural networks overall, with HAR achieving the lowest QLIKE loss function value. However, when the data is replaced with hourly high-frequency realized volatility, the deep learning models outperform the GARCH model, and HAR attains a comparable QLIKE loss function value. Despite the black-box nature of machine learning models, the deep learning models demonstrate superior forecasting performance, surpassing the fixed QLIKE value of HAR in the experiment. Moreover, as the forecast horizon extends for daily realized volatility, deep learning models gradually close the performance gap with the GARCH model in certain loss function metrics. Nonetheless, HAR remains the most effective model overall for daily realized volatility forecasting in copper futures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08356v1-abstract-full').style.display = 'none'; document.getElementById('2409.08356v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07931">arXiv:2409.07931</a> <span> [<a href="https://arxiv.org/pdf/2409.07931">pdf</a>, <a href="https://arxiv.org/format/2409.07931">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Task-Augmented Cross-View Imputation Network for Partial Multi-View Incomplete Multi-Label Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaohuan Lu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Lian Zhao</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+W+K">Wai Keung Wong</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+J">Jie Wen</a>, <a href="/search/cs?searchtype=author&query=Long%2C+J">Jiang Long</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+W">Wulin Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07931v1-abstract-short" style="display: inline;"> In real-world scenarios, multi-view multi-label learning often encounters the challenge of incomplete training data due to limitations in data collection and unreliable annotation processes. The absence of multi-view features impairs the comprehensive understanding of samples, omitting crucial details essential for classification. To address this issue, we present a task-augmented cross-view imput… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07931v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07931v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07931v1-abstract-full" style="display: none;"> In real-world scenarios, multi-view multi-label learning often encounters the challenge of incomplete training data due to limitations in data collection and unreliable annotation processes. The absence of multi-view features impairs the comprehensive understanding of samples, omitting crucial details essential for classification. To address this issue, we present a task-augmented cross-view imputation network (TACVI-Net) for the purpose of handling partial multi-view incomplete multi-label classification. Specifically, we employ a two-stage network to derive highly task-relevant features to recover the missing views. In the first stage, we leverage the information bottleneck theory to obtain a discriminative representation of each view by extracting task-relevant information through a view-specific encoder-classifier architecture. In the second stage, an autoencoder based multi-view reconstruction network is utilized to extract high-level semantic representation of the augmented features and recover the missing data, thereby aiding the final classification task. Extensive experiments on five datasets demonstrate that our TACVI-Net outperforms other state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07931v1-abstract-full').style.display = 'none'; document.getElementById('2409.07931v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07839">arXiv:2409.07839</a> <span> [<a href="https://arxiv.org/pdf/2409.07839">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> FPMT: Enhanced Semi-Supervised Model for Traffic Incident Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xinying Lu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+J">Jianli Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07839v1-abstract-short" style="display: inline;"> For traffic incident detection, the acquisition of data and labels is notably resource-intensive, rendering semi-supervised traffic incident detection both a formidable and consequential challenge. Thus, this paper focuses on traffic incident detection with a semi-supervised learning way. It proposes a semi-supervised learning model named FPMT within the framework of MixText. The data augmentation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07839v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07839v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07839v1-abstract-full" style="display: none;"> For traffic incident detection, the acquisition of data and labels is notably resource-intensive, rendering semi-supervised traffic incident detection both a formidable and consequential challenge. Thus, this paper focuses on traffic incident detection with a semi-supervised learning way. It proposes a semi-supervised learning model named FPMT within the framework of MixText. The data augmentation module introduces Generative Adversarial Networks to balance and expand the dataset. During the mix-up process in the hidden space, it employs a probabilistic pseudo-mixing mechanism to enhance regularization and elevate model precision. In terms of training strategy, it initiates with unsupervised training on all data, followed by supervised fine-tuning on a subset of labeled data, and ultimately completing the goal of semi-supervised training. Through empirical validation on four authentic datasets, our FPMT model exhibits outstanding performance across various metrics. Particularly noteworthy is its robust performance even in scenarios with low label rates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07839v1-abstract-full').style.display = 'none'; document.getElementById('2409.07839v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 3 figures, accepted by ICPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05847">arXiv:2409.05847</a> <span> [<a href="https://arxiv.org/pdf/2409.05847">pdf</a>, <a href="https://arxiv.org/format/2409.05847">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LSVOS Challenge Report: Large-scale Complex and Long Video Object Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+H">Henghui Ding</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+L">Lingyi Hong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chang Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+N">Ning Xu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Linjie Yang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Y">Yuchen Fan</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+D">Deshui Miao</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yameng Gu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xin Li</a>, <a href="/search/cs?searchtype=author&query=He%2C+Z">Zhenyu He</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yaowei Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Ming-Hsuan Yang</a>, <a href="/search/cs?searchtype=author&query=Chai%2C+J">Jinming Chai</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Q">Qin Ma</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junpei Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiao%2C+L">Licheng Jiao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fang Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xinyu Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kexin Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xu Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">LingLing Li</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+H">Hao Fang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+F">Feiyu Pan</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiankai Lu</a> , et al. (8 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05847v1-abstract-short" style="display: inline;"> Despite the promising performance of current video segmentation models on existing benchmarks, these models still struggle with complex scenes. In this paper, we introduce the 6th Large-scale Video Object Segmentation (LSVOS) challenge in conjunction with ECCV 2024 workshop. This year's challenge includes two tasks: Video Object Segmentation (VOS) and Referring Video Object Segmentation (RVOS). In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05847v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05847v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05847v1-abstract-full" style="display: none;"> Despite the promising performance of current video segmentation models on existing benchmarks, these models still struggle with complex scenes. In this paper, we introduce the 6th Large-scale Video Object Segmentation (LSVOS) challenge in conjunction with ECCV 2024 workshop. This year's challenge includes two tasks: Video Object Segmentation (VOS) and Referring Video Object Segmentation (RVOS). In this year, we replace the classic YouTube-VOS and YouTube-RVOS benchmark with latest datasets MOSE, LVOS, and MeViS to assess VOS under more challenging complex environments. This year's challenge attracted 129 registered teams from more than 20 institutes across over 8 countries. This report include the challenge and dataset introduction, and the methods used by top 7 teams in two tracks. More details can be found in our homepage https://lsvos.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05847v1-abstract-full').style.display = 'none'; document.getElementById('2409.05847v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024 LSVOS Challenge Report: https://lsvos.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03087">arXiv:2409.03087</a> <span> [<a href="https://arxiv.org/pdf/2409.03087">pdf</a>, <a href="https://arxiv.org/format/2409.03087">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Coupling AI and Citizen Science in Creation of Enhanced Training Dataset for Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Syahmi%2C+A">Amir Syahmi</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiangrong Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yinxuan Li</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+H">Haoxuan Yao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Hanjun Jiang</a>, <a href="/search/cs?searchtype=author&query=Acharya%2C+I">Ishita Acharya</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03087v1-abstract-short" style="display: inline;"> Recent advancements in medical imaging and artificial intelligence (AI) have greatly enhanced diagnostic capabilities, but the development of effective deep learning (DL) models is still constrained by the lack of high-quality annotated datasets. The traditional manual annotation process by medical experts is time- and resource-intensive, limiting the scalability of these datasets. In this work, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03087v1-abstract-full').style.display = 'inline'; document.getElementById('2409.03087v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03087v1-abstract-full" style="display: none;"> Recent advancements in medical imaging and artificial intelligence (AI) have greatly enhanced diagnostic capabilities, but the development of effective deep learning (DL) models is still constrained by the lack of high-quality annotated datasets. The traditional manual annotation process by medical experts is time- and resource-intensive, limiting the scalability of these datasets. In this work, we introduce a robust and versatile framework that combines AI and crowdsourcing to improve both the quality and quantity of medical image datasets across different modalities. Our approach utilises a user-friendly online platform that enables a diverse group of crowd annotators to label medical images efficiently. By integrating the MedSAM segmentation AI with this platform, we accelerate the annotation process while maintaining expert-level quality through an algorithm that merges crowd-labelled images. Additionally, we employ pix2pixGAN, a generative AI model, to expand the training dataset with synthetic images that capture realistic morphological features. These methods are combined into a cohesive framework designed to produce an enhanced dataset, which can serve as a universal pre-processing pipeline to boost the training of any medical deep learning segmentation model. Our results demonstrate that this framework significantly improves model performance, especially when training data is limited. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03087v1-abstract-full').style.display = 'none'; document.getElementById('2409.03087v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02239">arXiv:2409.02239</a> <span> [<a href="https://arxiv.org/pdf/2409.02239">pdf</a>, <a href="https://arxiv.org/format/2409.02239">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Temporal Order Preserved Optimal Transport-based Cross-modal Knowledge Transfer Learning for ASR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xugang Lu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+P">Peng Shen</a>, <a href="/search/cs?searchtype=author&query=Tsao%2C+Y">Yu Tsao</a>, <a href="/search/cs?searchtype=author&query=Kawai%2C+H">Hisashi Kawai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02239v2-abstract-short" style="display: inline;"> Transferring linguistic knowledge from a pretrained language model (PLM) to an acoustic model has been shown to greatly improve the performance of automatic speech recognition (ASR). However, due to the heterogeneous feature distributions in cross-modalities, designing an effective model for feature alignment and knowledge transfer between linguistic and acoustic sequences remains a challenging ta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02239v2-abstract-full').style.display = 'inline'; document.getElementById('2409.02239v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02239v2-abstract-full" style="display: none;"> Transferring linguistic knowledge from a pretrained language model (PLM) to an acoustic model has been shown to greatly improve the performance of automatic speech recognition (ASR). However, due to the heterogeneous feature distributions in cross-modalities, designing an effective model for feature alignment and knowledge transfer between linguistic and acoustic sequences remains a challenging task. Optimal transport (OT), which efficiently measures probability distribution discrepancies, holds great potential for aligning and transferring knowledge between acoustic and linguistic modalities. Nonetheless, the original OT treats acoustic and linguistic feature sequences as two unordered sets in alignment and neglects temporal order information during OT coupling estimation. Consequently, a time-consuming pretraining stage is required to learn a good alignment between the acoustic and linguistic representations. In this paper, we propose a Temporal Order Preserved OT (TOT)-based Cross-modal Alignment and Knowledge Transfer (CAKT) (TOT-CAKT) for ASR. In the TOT-CAKT, local neighboring frames of acoustic sequences are smoothly mapped to neighboring regions of linguistic sequences, preserving their temporal order relationship in feature alignment and matching. With the TOT-CAKT model framework, we conduct Mandarin ASR experiments with a pretrained Chinese PLM for linguistic knowledge transfer. Our results demonstrate that the proposed TOT-CAKT significantly improves ASR performance compared to several state-of-the-art models employing linguistic knowledge transfer, and addresses the weaknesses of the original OT-based method in sequential feature alignment for ASR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02239v2-abstract-full').style.display = 'none'; document.getElementById('2409.02239v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IEEE SLT 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00991">arXiv:2409.00991</a> <span> [<a href="https://arxiv.org/pdf/2409.00991">pdf</a>, <a href="https://arxiv.org/format/2409.00991">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> 3D Priors-Guided Diffusion for Blind Face Restoration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaobin Lu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaobin Hu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jun Luo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+B">Ben Zhu</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+Y">Yaping Ruan</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+W">Wenqi Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00991v2-abstract-short" style="display: inline;"> Blind face restoration endeavors to restore a clear face image from a degraded counterpart. Recent approaches employing Generative Adversarial Networks (GANs) as priors have demonstrated remarkable success in this field. However, these methods encounter challenges in achieving a balance between realism and fidelity, particularly in complex degradation scenarios. To inherit the exceptional realism… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00991v2-abstract-full').style.display = 'inline'; document.getElementById('2409.00991v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00991v2-abstract-full" style="display: none;"> Blind face restoration endeavors to restore a clear face image from a degraded counterpart. Recent approaches employing Generative Adversarial Networks (GANs) as priors have demonstrated remarkable success in this field. However, these methods encounter challenges in achieving a balance between realism and fidelity, particularly in complex degradation scenarios. To inherit the exceptional realism generative ability of the diffusion model and also constrained by the identity-aware fidelity, we propose a novel diffusion-based framework by embedding the 3D facial priors as structure and identity constraints into a denoising diffusion process. Specifically, in order to obtain more accurate 3D prior representations, the 3D facial image is reconstructed by a 3D Morphable Model (3DMM) using an initial restored face image that has been processed by a pretrained restoration network. A customized multi-level feature extraction method is employed to exploit both structural and identity information of 3D facial images, which are then mapped into the noise estimation process. In order to enhance the fusion of identity information into the noise estimation, we propose a Time-Aware Fusion Block (TAFB). This module offers a more efficient and adaptive fusion of weights for denoising, considering the dynamic nature of the denoising process in the diffusion model, which involves initial structure refinement followed by texture detail enhancement. Extensive experiments demonstrate that our network performs favorably against state-of-the-art algorithms on synthetic and real-world datasets for blind face restoration. The Code is released on our project page at https://github.com/838143396/3Diffusion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00991v2-abstract-full').style.display = 'none'; document.getElementById('2409.00991v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper was accepted by ACM MM 2024, and the project page is accessible at: https://github.com/838143396/3Diffusion</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00353">arXiv:2409.00353</a> <span> [<a href="https://arxiv.org/pdf/2409.00353">pdf</a>, <a href="https://arxiv.org/format/2409.00353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RI-MAE: Rotation-Invariant Masked AutoEncoders for Self-Supervised Point Cloud Representation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Su%2C+K">Kunming Su</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qiuxia Wu</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+P">Panpan Cai</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xiaogang Zhu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuequan Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhiyong Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+K">Kun Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00353v1-abstract-short" style="display: inline;"> Masked point modeling methods have recently achieved great success in self-supervised learning for point cloud data. However, these methods are sensitive to rotations and often exhibit sharp performance drops when encountering rotational variations. In this paper, we propose a novel Rotation-Invariant Masked AutoEncoders (RI-MAE) to address two major challenges: 1) achieving rotation-invariant lat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00353v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00353v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00353v1-abstract-full" style="display: none;"> Masked point modeling methods have recently achieved great success in self-supervised learning for point cloud data. However, these methods are sensitive to rotations and often exhibit sharp performance drops when encountering rotational variations. In this paper, we propose a novel Rotation-Invariant Masked AutoEncoders (RI-MAE) to address two major challenges: 1) achieving rotation-invariant latent representations, and 2) facilitating self-supervised reconstruction in a rotation-invariant manner. For the first challenge, we introduce RI-Transformer, which features disentangled geometry content, rotation-invariant relative orientation and position embedding mechanisms for constructing rotation-invariant point cloud latent space. For the second challenge, a novel dual-branch student-teacher architecture is devised. It enables the self-supervised learning via the reconstruction of masked patches within the learned rotation-invariant latent space. Each branch is based on an RI-Transformer, and they are connected with an additional RI-Transformer predictor. The teacher encodes all point patches, while the student solely encodes unmasked ones. Finally, the predictor predicts the latent features of the masked patches using the output latent embeddings from the student, supervised by the outputs from the teacher. Extensive experiments demonstrate that our method is robust to rotations, achieving the state-of-the-art performance on various downstream tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00353v1-abstract-full').style.display = 'none'; document.getElementById('2409.00353v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00338">arXiv:2409.00338</a> <span> [<a href="https://arxiv.org/pdf/2409.00338">pdf</a>, <a href="https://arxiv.org/format/2409.00338">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> GSpect: Spectral Filtering for Cross-Scale Graph Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaoyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wenchuan Yang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+J">Jiawei Feng</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+B">Bitao Dai</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+T">Tianci Bu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xin Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00338v1-abstract-short" style="display: inline;"> Identifying structures in common forms the basis for networked systems design and optimization. However, real structures represented by graphs are often of varying sizes, leading to the low accuracy of traditional graph classification methods. These graphs are called cross-scale graphs. To overcome this limitation, in this study, we propose GSpect, an advanced spectral graph filtering model for cr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00338v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00338v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00338v1-abstract-full" style="display: none;"> Identifying structures in common forms the basis for networked systems design and optimization. However, real structures represented by graphs are often of varying sizes, leading to the low accuracy of traditional graph classification methods. These graphs are called cross-scale graphs. To overcome this limitation, in this study, we propose GSpect, an advanced spectral graph filtering model for cross-scale graph classification tasks. Compared with other methods, we use graph wavelet neural networks for the convolution layer of the model, which aggregates multi-scale messages to generate graph representations. We design a spectral-pooling layer which aggregates nodes to one node to reduce the cross-scale graphs to the same size. We collect and construct the cross-scale benchmark data set, MSG (Multi Scale Graphs). Experiments reveal that, on open data sets, GSpect improves the performance of classification accuracy by 1.62% on average, and for a maximum of 3.33% on PROTEINS. On MSG, GSpect improves the performance of classification accuracy by 15.55% on average. GSpect fills the gap in cross-scale graph classification studies and has potential to provide assistance in application research like diagnosis of brain disease by predicting the brain network's label and developing new drugs with molecular structures learned from their counterparts in other systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00338v1-abstract-full').style.display = 'none'; document.getElementById('2409.00338v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00130">arXiv:2409.00130</a> <span> [<a href="https://arxiv.org/pdf/2409.00130">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mirror contrastive loss based sliding window transformer for subject-independent motor imagery based EEG signal recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jing Luo</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+Q">Qi Mao</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weiwei Shi</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Zhenghao Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaofan Wang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaofeng Lu</a>, <a href="/search/cs?searchtype=author&query=Hei%2C+X">Xinhong Hei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00130v1-abstract-short" style="display: inline;"> While deep learning models have been extensively utilized in motor imagery based EEG signal recognition, they often operate as black boxes. Motivated by neurological findings indicating that the mental imagery of left or right-hand movement induces event-related desynchronization (ERD) in the contralateral sensorimotor area of the brain, we propose a Mirror Contrastive Loss based Sliding Window Tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00130v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00130v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00130v1-abstract-full" style="display: none;"> While deep learning models have been extensively utilized in motor imagery based EEG signal recognition, they often operate as black boxes. Motivated by neurological findings indicating that the mental imagery of left or right-hand movement induces event-related desynchronization (ERD) in the contralateral sensorimotor area of the brain, we propose a Mirror Contrastive Loss based Sliding Window Transformer (MCL-SWT) to enhance subject-independent motor imagery-based EEG signal recognition. Specifically, our proposed mirror contrastive loss enhances sensitivity to the spatial location of ERD by contrasting the original EEG signals with their mirror counterparts-mirror EEG signals generated by interchanging the channels of the left and right hemispheres of the EEG signals. Moreover, we introduce a temporal sliding window transformer that computes self-attention scores from high temporal resolution features, thereby improving model performance with manageable computational complexity. We evaluate the performance of MCL-SWT on subject-independent motor imagery EEG signal recognition tasks, and our experimental results demonstrate that MCL-SWT achieved accuracies of 66.48% and 75.62%, surpassing the state-of-the-art (SOTA) model by 2.82% and 2.17%, respectively. Furthermore, ablation experiments confirm the effectiveness of the proposed mirror contrastive loss. A code demo of MCL-SWT is available at https://github.com/roniusLuo/MCL_SWT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00130v1-abstract-full').style.display = 'none'; document.getElementById('2409.00130v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by the Fourth International Workshop on Human Brain and Artificial Intelligence, joint workshop of the 33rd International Joint Conference on Artificial Intelligence, Jeju Island, South Korea, from August 3rd to August 9th, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15829">arXiv:2408.15829</a> <span> [<a href="https://arxiv.org/pdf/2408.15829">pdf</a>, <a href="https://arxiv.org/format/2408.15829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SITransformer: Shared Information-Guided Transformer for Extreme Multimodal Summarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sicheng Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lintao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xiaogan Zhu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuequan Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhiyong Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+K">Kun Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15829v2-abstract-short" style="display: inline;"> Extreme Multimodal Summarization with Multimodal Output (XMSMO) becomes an attractive summarization approach by integrating various types of information to create extremely concise yet informative summaries for individual modalities. Existing methods overlook the issue that multimodal data often contains more topic irrelevant information, which can mislead the model into producing inaccurate summa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15829v2-abstract-full').style.display = 'inline'; document.getElementById('2408.15829v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15829v2-abstract-full" style="display: none;"> Extreme Multimodal Summarization with Multimodal Output (XMSMO) becomes an attractive summarization approach by integrating various types of information to create extremely concise yet informative summaries for individual modalities. Existing methods overlook the issue that multimodal data often contains more topic irrelevant information, which can mislead the model into producing inaccurate summaries especially for extremely short ones. In this paper, we propose SITransformer, a Shared Information-guided Transformer for extreme multimodal summarization. It has a shared information guided pipeline which involves a cross-modal shared information extractor and a cross-modal interaction module. The extractor formulates semantically shared salient information from different modalities by devising a novel filtering process consisting of a differentiable top-k selector and a shared-information guided gating unit. As a result, the common, salient, and relevant contents across modalities are identified. Next, a transformer with cross-modal attentions is developed for intra- and inter-modality learning with the shared information guidance to produce the extreme summary. Comprehensive experiments demonstrate that SITransformer significantly enhances the summarization quality for both video and text summaries for XMSMO. Our code will be publicly available at https://github.com/SichengLeoLiu/MMAsia24-XMSMO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15829v2-abstract-full').style.display = 'none'; document.getElementById('2408.15829v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures, submitted to ACM Multimedia Asia 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15666">arXiv:2408.15666</a> <span> [<a href="https://arxiv.org/pdf/2408.15666">pdf</a>, <a href="https://arxiv.org/format/2408.15666">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> StyleRemix: Interpretable Authorship Obfuscation via Distillation and Perturbation of Style Elements </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fisher%2C+J">Jillian Fisher</a>, <a href="/search/cs?searchtype=author&query=Hallinan%2C+S">Skyler Hallinan</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Ximing Lu</a>, <a href="/search/cs?searchtype=author&query=Gordon%2C+M">Mitchell Gordon</a>, <a href="/search/cs?searchtype=author&query=Harchaoui%2C+Z">Zaid Harchaoui</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15666v1-abstract-short" style="display: inline;"> Authorship obfuscation, rewriting a text to intentionally obscure the identity of the author, is an important but challenging task. Current methods using large language models (LLMs) lack interpretability and controllability, often ignoring author-specific stylistic features, resulting in less robust performance overall. To address this, we develop StyleRemix, an adaptive and interpretable obfus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15666v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15666v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15666v1-abstract-full" style="display: none;"> Authorship obfuscation, rewriting a text to intentionally obscure the identity of the author, is an important but challenging task. Current methods using large language models (LLMs) lack interpretability and controllability, often ignoring author-specific stylistic features, resulting in less robust performance overall. To address this, we develop StyleRemix, an adaptive and interpretable obfuscation method that perturbs specific, fine-grained style elements of the original input text. StyleRemix uses pre-trained Low Rank Adaptation (LoRA) modules to rewrite an input specifically along various stylistic axes (e.g., formality and length) while maintaining low computational cost. StyleRemix outperforms state-of-the-art baselines and much larger LLMs in a variety of domains as assessed by both automatic and human evaluation. Additionally, we release AuthorMix, a large set of 30K high-quality, long-form texts from a diverse set of 14 authors and 4 domains, and DiSC, a parallel corpus of 1,500 texts spanning seven style axes in 16 unique directions <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15666v1-abstract-full').style.display = 'none'; document.getElementById('2408.15666v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14158">arXiv:2408.14158</a> <span> [<a href="https://arxiv.org/pdf/2408.14158">pdf</a>, <a href="https://arxiv.org/format/2408.14158">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Fire-Flyer AI-HPC: A Cost-Effective Software-Hardware Co-Design for Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=An%2C+W">Wei An</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+X">Xiao Bi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Guanting Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shanhuang Chen</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+C">Chengqi Deng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+H">Honghui Ding</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+K">Kai Dong</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Q">Qiushi Du</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+W">Wenjun Gao</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+K">Kang Guan</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jianzhong Guo</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yongqiang Guo</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Z">Zhe Fu</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Ying He</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+P">Panpan Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiashi Li</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+W">Wenfeng Liang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaodong Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xin Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuxuan Liu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Shanghao Lu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuan Lu</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+X">Xiaotao Nie</a>, <a href="/search/cs?searchtype=author&query=Pei%2C+T">Tian Pei</a> , et al. (27 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14158v2-abstract-short" style="display: inline;"> The rapid progress in Deep Learning (DL) and Large Language Models (LLMs) has exponentially increased demands of computational power and bandwidth. This, combined with the high costs of faster computing chips and interconnects, has significantly inflated High Performance Computing (HPC) construction costs. To address these challenges, we introduce the Fire-Flyer AI-HPC architecture, a synergistic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14158v2-abstract-full').style.display = 'inline'; document.getElementById('2408.14158v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14158v2-abstract-full" style="display: none;"> The rapid progress in Deep Learning (DL) and Large Language Models (LLMs) has exponentially increased demands of computational power and bandwidth. This, combined with the high costs of faster computing chips and interconnects, has significantly inflated High Performance Computing (HPC) construction costs. To address these challenges, we introduce the Fire-Flyer AI-HPC architecture, a synergistic hardware-software co-design framework and its best practices. For DL training, we deployed the Fire-Flyer 2 with 10,000 PCIe A100 GPUs, achieved performance approximating the DGX-A100 while reducing costs by half and energy consumption by 40%. We specifically engineered HFReduce to accelerate allreduce communication and implemented numerous measures to keep our Computation-Storage Integrated Network congestion-free. Through our software stack, including HaiScale, 3FS, and HAI-Platform, we achieved substantial scalability by overlapping computation and communication. Our system-oriented experience from DL training provides valuable insights to drive future advancements in AI-HPC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14158v2-abstract-full').style.display = 'none'; document.getElementById('2408.14158v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is the preprint version of the paper accepted for presentation at the 2024 International Conference for High Performance Computing, Networking, Storage, and Analysis (SC'24). \c{opyright} 2024 IEEE. Personal use of this material is permitted. For other uses, permission from IEEE must be obtained. Please refer to IEEE Xplore for the final published version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13574">arXiv:2408.13574</a> <span> [<a href="https://arxiv.org/pdf/2408.13574">pdf</a>, <a href="https://arxiv.org/format/2408.13574">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PointDGMamba: Domain Generalization of Point Cloud Classification via Generalized State Space Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hao Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qianyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Haijia Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiangtai Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fengqi Liu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuequan Lu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lizhuang Ma</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+S">Shuicheng Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13574v1-abstract-short" style="display: inline;"> Domain Generalization (DG) has been recently explored to improve the generalizability of point cloud classification (PCC) models toward unseen domains. However, they often suffer from limited receptive fields or quadratic complexity due to the use of convolution neural networks or vision Transformers. In this paper, we present the first work that studies the generalizability of state space models… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13574v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13574v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13574v1-abstract-full" style="display: none;"> Domain Generalization (DG) has been recently explored to improve the generalizability of point cloud classification (PCC) models toward unseen domains. However, they often suffer from limited receptive fields or quadratic complexity due to the use of convolution neural networks or vision Transformers. In this paper, we present the first work that studies the generalizability of state space models (SSMs) in DG PCC and find that directly applying SSMs into DG PCC will encounter several challenges: the inherent topology of the point cloud tends to be disrupted and leads to noise accumulation during the serialization stage. Besides, the lack of designs in domain-agnostic feature learning and data scanning will introduce unanticipated domain-specific information into the 3D sequence data. To this end, we propose a novel framework, PointDGMamba, that excels in strong generalizability toward unseen domains and has the advantages of global receptive fields and efficient linear complexity. PointDGMamba consists of three innovative components: Masked Sequence Denoising (MSD), Sequence-wise Cross-domain Feature Aggregation (SCFA), and Dual-level Domain Scanning (DDS). In particular, MSD selectively masks out the noised point tokens of the point cloud sequences, SCFA introduces cross-domain but same-class point cloud features to encourage the model to learn how to extract more generalized features. DDS includes intra-domain scanning and cross-domain scanning to facilitate information exchange between features. In addition, we propose a new and more challenging benchmark PointDG-3to1 for multi-domain generalization. Extensive experiments demonstrate the effectiveness and state-of-the-art performance of our presented PointDGMamba. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13574v1-abstract-full').style.display = 'none'; document.getElementById('2408.13574v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13473">arXiv:2408.13473</a> <span> [<a href="https://arxiv.org/pdf/2408.13473">pdf</a>, <a href="https://arxiv.org/format/2408.13473">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Why Antiwork: A RoBERTa-Based System for Work-Related Stress Identification and Leading Factor Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+T">Tao Lu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Muzhe Wu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xinyi Lu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+S">Siyuan Xu</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+S">Shuyu Zhan</a>, <a href="/search/cs?searchtype=author&query=Tambwekar%2C+A">Anuj Tambwekar</a>, <a href="/search/cs?searchtype=author&query=Provost%2C+E+M">Emily Mower Provost</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13473v1-abstract-short" style="display: inline;"> Harsh working environments and work-related stress have been known to contribute to mental health problems such as anxiety, depression, and suicidal ideation. As such, it is paramount to create solutions that can both detect employee unhappiness and find the root cause of the problem. While prior works have examined causes of mental health using machine learning, they typically focus on general me… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13473v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13473v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13473v1-abstract-full" style="display: none;"> Harsh working environments and work-related stress have been known to contribute to mental health problems such as anxiety, depression, and suicidal ideation. As such, it is paramount to create solutions that can both detect employee unhappiness and find the root cause of the problem. While prior works have examined causes of mental health using machine learning, they typically focus on general mental health analysis, with few of them focusing on explainable solutions or looking at the workplace-specific setting. r/antiwork is a subreddit for the antiwork movement, which is the desire to stop working altogether. Using this subreddit as a proxy for work environment dissatisfaction, we create a new dataset for antiwork sentiment detection and subsequently train a model that highlights the words with antiwork sentiments. Following this, we performed a qualitative and quantitative analysis to uncover some of the key insights into the mindset of individuals who identify with the antiwork movement and how their working environments influenced them. We find that working environments that do not give employees authority or responsibility, frustrating recruiting experiences, and unfair compensation, are some of the leading causes of the antiwork sentiment, resulting in a lack of self-confidence and motivation among their employees. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13473v1-abstract-full').style.display = 'none'; document.getElementById('2408.13473v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 8 figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Lu%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Lu%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Lu%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Lu%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Lu%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Lu%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>