Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 52 results for author: <span class="mathjax">Mo, F</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Mo%2C+F">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Mo, F"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Mo%2C+F&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Mo, F"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Mo%2C+F&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Mo%2C+F&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Mo%2C+F&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14822">arXiv:2502.14822</a> <span> [<a href="https://arxiv.org/pdf/2502.14822">pdf</a>, <a href="https://arxiv.org/format/2502.14822">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Model Architectures in Information Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhichao Xu</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhiqi Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Crystina Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+P">Puxuan Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bei Wang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jimmy Lin</a>, <a href="/search/cs?searchtype=author&query=Srikumar%2C+V">Vivek Srikumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14822v1-abstract-short" style="display: inline;"> This survey examines the evolution of model architectures in information retrieval (IR), focusing on two key aspects: backbone models for feature extraction and end-to-end system architectures for relevance estimation. The review intentionally separates architectural considerations from training methodologies to provide a focused analysis of structural innovations in IR systems.We trace the develo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14822v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14822v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14822v1-abstract-full" style="display: none;"> This survey examines the evolution of model architectures in information retrieval (IR), focusing on two key aspects: backbone models for feature extraction and end-to-end system architectures for relevance estimation. The review intentionally separates architectural considerations from training methodologies to provide a focused analysis of structural innovations in IR systems.We trace the development from traditional term-based methods to modern neural approaches, particularly highlighting the impact of transformer-based models and subsequent large language models (LLMs). We conclude by discussing emerging challenges and future directions, including architectural optimizations for performance and scalability, handling of multimodal, multilingual data, and adaptation to novel application domains beyond traditional search paradigms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14822v1-abstract-full').style.display = 'none'; document.getElementById('2502.14822v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02975">arXiv:2502.02975</a> <span> [<a href="https://arxiv.org/pdf/2502.02975">pdf</a>, <a href="https://arxiv.org/format/2502.02975">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TGB-Seq Benchmark: Challenging Temporal GNNs with Complex Sequential Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yi%2C+L">Lu Yi</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+J">Jie Peng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yanping Zheng</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zhewei Wei</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yuhang Ye</a>, <a href="/search/cs?searchtype=author&query=Zixuan%2C+Y">Yue Zixuan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zengfeng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02975v1-abstract-short" style="display: inline;"> Future link prediction is a fundamental challenge in various real-world dynamic systems. To address this, numerous temporal graph neural networks (temporal GNNs) and benchmark datasets have been developed. However, these datasets often feature excessive repeated edges and lack complex sequential dynamics, a key characteristic inherent in many real-world applications such as recommender systems and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02975v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02975v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02975v1-abstract-full" style="display: none;"> Future link prediction is a fundamental challenge in various real-world dynamic systems. To address this, numerous temporal graph neural networks (temporal GNNs) and benchmark datasets have been developed. However, these datasets often feature excessive repeated edges and lack complex sequential dynamics, a key characteristic inherent in many real-world applications such as recommender systems and ``Who-To-Follow'' on social networks. This oversight has led existing methods to inadvertently downplay the importance of learning sequential dynamics, focusing primarily on predicting repeated edges. In this study, we demonstrate that existing methods, such as GraphMixer and DyGFormer, are inherently incapable of learning simple sequential dynamics, such as ``a user who has followed OpenAI and Anthropic is more likely to follow AI at Meta next.'' Motivated by this issue, we introduce the Temporal Graph Benchmark with Sequential Dynamics (TGB-Seq), a new benchmark carefully curated to minimize repeated edges, challenging models to learn sequential dynamics and generalize to unseen edges. TGB-Seq comprises large real-world datasets spanning diverse domains, including e-commerce interactions, movie ratings, business reviews, social networks, citation networks and web link networks. Benchmarking experiments reveal that current methods usually suffer significant performance degradation and incur substantial training costs on TGB-Seq, posing new challenges and opportunities for future research. TGB-Seq datasets, leaderboards, and example codes are available at https://tgb-seq.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02975v1-abstract-full').style.display = 'none'; document.getElementById('2502.02975v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">published at ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17802">arXiv:2501.17802</a> <span> [<a href="https://arxiv.org/pdf/2501.17802">pdf</a>, <a href="https://arxiv.org/format/2501.17802">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LEKA:LLM-Enhanced Knowledge Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinhao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jinghan Zhang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dongjie Wang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yanjie Fu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kunpeng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17802v1-abstract-short" style="display: inline;"> Humans excel in analogical learning and knowledge transfer and, more importantly, possess a unique understanding of identifying appropriate sources of knowledge. From a model's perspective, this presents an interesting challenge. If models could autonomously retrieve knowledge useful for transfer or decision-making to solve problems, they would transition from passively acquiring to actively acces… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17802v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17802v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17802v1-abstract-full" style="display: none;"> Humans excel in analogical learning and knowledge transfer and, more importantly, possess a unique understanding of identifying appropriate sources of knowledge. From a model's perspective, this presents an interesting challenge. If models could autonomously retrieve knowledge useful for transfer or decision-making to solve problems, they would transition from passively acquiring to actively accessing and learning from knowledge. However, filling models with knowledge is relatively straightforward -- it simply requires more training and accessible knowledge bases. The more complex task is teaching models about which knowledge can be analogized and transferred. Therefore, we design a knowledge augmentation method LEKA for knowledge transfer that actively searches for suitable knowledge sources that can enrich the target domain's knowledge. This LEKA method extracts key information from textual information from the target domain, retrieves pertinent data from external data libraries, and harmonizes retrieved data with the target domain data in feature space and marginal probability measures. We validate the effectiveness of our approach through extensive experiments across various domains and demonstrate significant improvements over traditional methods in reducing computational costs, automating data alignment, and optimizing transfer learning outcomes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17802v1-abstract-full').style.display = 'none'; document.getElementById('2501.17802v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13391">arXiv:2501.13391</a> <span> [<a href="https://arxiv.org/pdf/2501.13391">pdf</a>, <a href="https://arxiv.org/format/2501.13391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Can Large Language Models Understand Preferences in Personalized Recommendation? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+Z">Zhaoxuan Tan</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zinan Zeng</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Q">Qingkai Zeng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhenyu Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zheyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+M">Meng Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13391v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) excel in various tasks, including personalized recommendations. Existing evaluation methods often focus on rating prediction, relying on regression errors between actual and predicted ratings. However, user rating bias and item quality, two influential factors behind rating scores, can obscure personal preferences in user-item pair data. To address this, we introduce P… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13391v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13391v1-abstract-full" style="display: none;"> Large Language Models (LLMs) excel in various tasks, including personalized recommendations. Existing evaluation methods often focus on rating prediction, relying on regression errors between actual and predicted ratings. However, user rating bias and item quality, two influential factors behind rating scores, can obscure personal preferences in user-item pair data. To address this, we introduce PerRecBench, disassociating the evaluation from these two factors and assessing recommendation techniques on capturing the personal preferences in a grouped ranking manner. We find that the LLM-based recommendation techniques that are generally good at rating prediction fail to identify users' favored and disfavored items when the user rating bias and item quality are eliminated by grouping users. With PerRecBench and 19 LLMs, we find that while larger models generally outperform smaller ones, they still struggle with personalized recommendation. Our findings reveal the superiority of pairwise and listwise ranking approaches over pointwise ranking, PerRecBench's low correlation with traditional regression metrics, the importance of user profiles, and the role of pretraining data distributions. We further explore three supervised fine-tuning strategies, finding that merging weights from single-format training is promising but improving LLMs' understanding of user preferences remains an open research problem. Code and data are available at https://github.com/TamSiuhin/PerRecBench <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13391v1-abstract-full').style.display = 'none'; document.getElementById('2501.13391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03085">arXiv:2501.03085</a> <span> [<a href="https://arxiv.org/pdf/2501.03085">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Personalized Fashion Recommendation with Image Attributes and Aesthetics Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chongxian Chen</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xin Fan</a>, <a href="/search/cs?searchtype=author&query=Yamana%2C+H">Hayato Yamana</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03085v1-abstract-short" style="display: inline;"> Personalized fashion recommendation is a difficult task because 1) the decisions are highly correlated with users' aesthetic appetite, which previous work frequently overlooks, and 2) many new items are constantly rolling out that cause strict cold-start problems in the popular identity (ID)-based recommendation methods. These new items are critical to recommend because of trend-driven consumerism… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03085v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03085v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03085v1-abstract-full" style="display: none;"> Personalized fashion recommendation is a difficult task because 1) the decisions are highly correlated with users' aesthetic appetite, which previous work frequently overlooks, and 2) many new items are constantly rolling out that cause strict cold-start problems in the popular identity (ID)-based recommendation methods. These new items are critical to recommend because of trend-driven consumerism. In this work, we aim to provide more accurate personalized fashion recommendations and solve the cold-start problem by converting available information, especially images, into two attribute graphs focusing on optimized image utilization and noise-reducing user modeling. Compared with previous methods that separate image and text as two components, the proposed method combines image and text information to create a richer attributes graph. Capitalizing on the advancement of large language and vision models, we experiment with extracting fine-grained attributes efficiently and as desired using two different prompts. Preliminary experiments on the IQON3000 dataset have shown that the proposed method achieves competitive accuracy compared with baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03085v1-abstract-full').style.display = 'none'; document.getElementById('2501.03085v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16833">arXiv:2412.16833</a> <span> [<a href="https://arxiv.org/pdf/2412.16833">pdf</a>, <a href="https://arxiv.org/format/2412.16833">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> KG4Diagnosis: A Hierarchical Multi-Agent LLM Framework with Knowledge Graph Enhancement for Medical Diagnosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zuo%2C+K">Kaiwen Zuo</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yirui Jiang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Lio%2C+P">Pietro Lio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16833v2-abstract-short" style="display: inline;"> Integrating Large Language Models (LLMs) in healthcare diagnosis demands systematic frameworks that can handle complex medical scenarios while maintaining specialized expertise. We present KG4Diagnosis, a novel hierarchical multi-agent framework that combines LLMs with automated knowledge graph construction, encompassing 362 common diseases across medical specialties. Our framework mirrors real-wo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16833v2-abstract-full').style.display = 'inline'; document.getElementById('2412.16833v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16833v2-abstract-full" style="display: none;"> Integrating Large Language Models (LLMs) in healthcare diagnosis demands systematic frameworks that can handle complex medical scenarios while maintaining specialized expertise. We present KG4Diagnosis, a novel hierarchical multi-agent framework that combines LLMs with automated knowledge graph construction, encompassing 362 common diseases across medical specialties. Our framework mirrors real-world medical systems through a two-tier architecture: a general practitioner (GP) agent for initial assessment and triage, coordinating with specialized agents for in-depth diagnosis in specific domains. The core innovation lies in our end-to-end knowledge graph generation methodology, incorporating: (1) semantic-driven entity and relation extraction optimized for medical terminology, (2) multi-dimensional decision relationship reconstruction from unstructured medical texts, and (3) human-guided reasoning for knowledge expansion. KG4Diagnosis serves as an extensible foundation for specialized medical diagnosis systems, with capabilities to incorporate new diseases and medical knowledge. The framework's modular design enables seamless integration of domain-specific enhancements, making it valuable for developing targeted medical diagnosis systems. We provide architectural guidelines and protocols to facilitate adoption across medical contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16833v2-abstract-full').style.display = 'none'; document.getElementById('2412.16833v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages,5 figures,published to AAAI-25 Bridge Program</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07998">arXiv:2412.07998</a> <span> [<a href="https://arxiv.org/pdf/2412.07998">pdf</a>, <a href="https://arxiv.org/ps/2412.07998">ps</a>, <a href="https://arxiv.org/format/2412.07998">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> RALI@TREC iKAT 2024: Achieving Personalization via Retrieval Fusion in Conversational Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hui%2C+Y">Yuchen Hui</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+M">Milan Mao</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07998v1-abstract-short" style="display: inline;"> The Recherche Appliquee en Linguistique Informatique (RALI) team participated in the 2024 TREC Interactive Knowledge Assistance (iKAT) Track. In personalized conversational search, effectively capturing a user's complex search intent requires incorporating both contextual information and key elements from the user profile into query reformulation. The user profile often contains many relevant piec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07998v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07998v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07998v1-abstract-full" style="display: none;"> The Recherche Appliquee en Linguistique Informatique (RALI) team participated in the 2024 TREC Interactive Knowledge Assistance (iKAT) Track. In personalized conversational search, effectively capturing a user's complex search intent requires incorporating both contextual information and key elements from the user profile into query reformulation. The user profile often contains many relevant pieces, and each could potentially complement the user's information needs. It is difficult to disregard any of them, whereas introducing an excessive number of these pieces risks drifting from the original query and hinders search performance. This is a challenge we denote as over-personalization. To address this, we propose different strategies by fusing ranking lists generated from the queries with different levels of personalization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07998v1-abstract-full').style.display = 'none'; document.getElementById('2412.07998v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work presented at NIST Text Retrieval Conference 2024. https://www.nist.gov/news-events/events/2024/11/trec2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01561">arXiv:2411.01561</a> <span> [<a href="https://arxiv.org/pdf/2411.01561">pdf</a>, <a href="https://arxiv.org/format/2411.01561">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Graph Neural Network for Recommendation with Dynamic De-redundancy and Modality-Guided Feature De-noisy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Feng Mo</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lin Xiao</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Q">Qiya Song</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xieping Gao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+E">Eryao Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01561v1-abstract-short" style="display: inline;"> Graph neural networks (GNNs) have become crucial in multimodal recommendation tasks because of their powerful ability to capture complex relationships between neighboring nodes. However, increasing the number of propagation layers in GNNs can lead to feature redundancy, which may negatively impact the overall recommendation performance. In addition, the existing recommendation task method directly… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01561v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01561v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01561v1-abstract-full" style="display: none;"> Graph neural networks (GNNs) have become crucial in multimodal recommendation tasks because of their powerful ability to capture complex relationships between neighboring nodes. However, increasing the number of propagation layers in GNNs can lead to feature redundancy, which may negatively impact the overall recommendation performance. In addition, the existing recommendation task method directly maps the preprocessed multimodal features to the low-dimensional space, which will bring the noise unrelated to user preference, thus affecting the representation ability of the model. To tackle the aforementioned challenges, we propose Multimodal Graph Neural Network for Recommendation (MGNM) with Dynamic De-redundancy and Modality-Guided Feature De-noisy, which is divided into local and global interaction. Initially, in the local interaction process,we integrate a dynamic de-redundancy (DDR) loss function which is achieved by utilizing the product of the feature coefficient matrix and the feature matrix as a penalization factor. It reduces the feature redundancy effects of multimodal and behavioral features caused by the stacking of multiple GNN layers. Subsequently, in the global interaction process, we developed modality-guided global feature purifiers for each modality to alleviate the impact of modality noise. It is a two-fold guiding mechanism eliminating modality features that are irrelevant to user preferences and captures complex relationships within the modality. Experimental results demonstrate that MGNM achieves superior performance on multimodal information denoising and removal of redundant information compared to the state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01561v1-abstract-full').style.display = 'none'; document.getElementById('2411.01561v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.24155">arXiv:2410.24155</a> <span> [<a href="https://arxiv.org/pdf/2410.24155">pdf</a>, <a href="https://arxiv.org/format/2410.24155">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Blind Spot Navigation in LLM Reasoning with Thought Space Explorer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jinghan Zhang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiting Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kunpeng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.24155v2-abstract-short" style="display: inline;"> Recent advances in large language models (LLMs) have demonstrated their potential in handling complex reasoning tasks, which are usually achieved by constructing a thought chain to guide the model to solve the problem with multi-step thinking. However, existing methods often remain confined to previously explored solution spaces and thus overlook the critical blind spot within LLMs' cognitive rang… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24155v2-abstract-full').style.display = 'inline'; document.getElementById('2410.24155v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.24155v2-abstract-full" style="display: none;"> Recent advances in large language models (LLMs) have demonstrated their potential in handling complex reasoning tasks, which are usually achieved by constructing a thought chain to guide the model to solve the problem with multi-step thinking. However, existing methods often remain confined to previously explored solution spaces and thus overlook the critical blind spot within LLMs' cognitive range. To address these issues, we design the Thought Space Explorer (TSE), a novel framework to expand and optimize thought structures to guide LLMs to explore their blind spots of thinking. By generating new reasoning steps and branches based on the original thought structure with various designed strategies, TSE broadens the thought space and alleviates the impact of blind spots for LLM reasoning. Experimental results on multiple levels of reasoning tasks demonstrate the efficacy of TSE. We also conduct extensive analysis to understand how structured and expansive thought can contribute to unleashing the potential of LLM reasoning capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24155v2-abstract-full').style.display = 'none'; document.getElementById('2410.24155v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15576">arXiv:2410.15576</a> <span> [<a href="https://arxiv.org/pdf/2410.15576">pdf</a>, <a href="https://arxiv.org/format/2410.15576">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Conversational Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+K">Kelong Mao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Ziliang Zhao</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+H">Hongjin Qian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haonan Chen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yiruo Cheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoxi Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yutao Zhu</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+Z">Zhicheng Dou</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15576v1-abstract-short" style="display: inline;"> As a cornerstone of modern information access, search engines have become indispensable in everyday life. With the rapid advancements in AI and natural language processing (NLP) technologies, particularly large language models (LLMs), search engines have evolved to support more intuitive and intelligent interactions between users and systems. Conversational search, an emerging paradigm for next-ge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15576v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15576v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15576v1-abstract-full" style="display: none;"> As a cornerstone of modern information access, search engines have become indispensable in everyday life. With the rapid advancements in AI and natural language processing (NLP) technologies, particularly large language models (LLMs), search engines have evolved to support more intuitive and intelligent interactions between users and systems. Conversational search, an emerging paradigm for next-generation search engines, leverages natural language dialogue to facilitate complex and precise information retrieval, thus attracting significant attention. Unlike traditional keyword-based search engines, conversational search systems enhance user experience by supporting intricate queries, maintaining context over multi-turn interactions, and providing robust information integration and processing capabilities. Key components such as query reformulation, search clarification, conversational retrieval, and response generation work in unison to enable these sophisticated interactions. In this survey, we explore the recent advancements and potential future directions in conversational search, examining the critical modules that constitute a conversational search system. We highlight the integration of LLMs in enhancing these systems and discuss the challenges and opportunities that lie ahead in this dynamic field. Additionally, we provide insights into real-world applications and robust evaluations of current conversational search systems, aiming to guide future research and development in conversational search. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15576v1-abstract-full').style.display = 'none'; document.getElementById('2410.15576v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">35 pages, 8 figures, continue to update</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03212">arXiv:2410.03212</a> <span> [<a href="https://arxiv.org/pdf/2410.03212">pdf</a>, <a href="https://arxiv.org/format/2410.03212">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Data-Efficient Massive Tool Retrieval: A Reinforcement Learning Approach for Query-Tool Alignment with Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xin Fan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junjie Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chongxian Chen</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Sakai%2C+T">Tetsuya Sakai</a>, <a href="/search/cs?searchtype=author&query=Yamana%2C+H">Hayato Yamana</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03212v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) integrated with external tools and APIs have successfully addressed complex tasks by using in-context learning or fine-tuning. Despite this progress, the vast scale of tool retrieval remains challenging due to stringent input length constraints. In response, we propose a pre-retrieval strategy from an extensive repository, effectively framing the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03212v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03212v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03212v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) integrated with external tools and APIs have successfully addressed complex tasks by using in-context learning or fine-tuning. Despite this progress, the vast scale of tool retrieval remains challenging due to stringent input length constraints. In response, we propose a pre-retrieval strategy from an extensive repository, effectively framing the problem as the massive tool retrieval (MTR) task. We introduce the MTRB (massive tool retrieval benchmark) to evaluate real-world tool-augmented LLM scenarios with a large number of tools. This benchmark is designed for low-resource scenarios and includes a diverse collection of tools with descriptions refined for consistency and clarity. It consists of three subsets, each containing 90 test samples and 10 training samples. To handle the low-resource MTR task, we raise a new query-tool alignment (QTA) framework leverages LLMs to enhance query-tool alignment by rewriting user queries through ranking functions and the direct preference optimization (DPO) method. This approach consistently outperforms existing state-of-the-art models in top-5 and top-10 retrieval tasks across the MTRB benchmark, with improvements up to 93.28% based on the metric Sufficiency@k, which measures the adequacy of tool retrieval within the first k results. Furthermore, ablation studies validate the efficacy of our framework, highlighting its capacity to optimize performance even with limited annotated samples. Specifically, our framework achieves up to 78.53% performance improvement in Sufficiency@k with just a single annotated sample. Additionally, QTA exhibits strong cross-dataset generalizability, emphasizing its potential for real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03212v1-abstract-full').style.display = 'none'; document.getElementById('2410.03212v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17508">arXiv:2409.17508</a> <span> [<a href="https://arxiv.org/pdf/2409.17508">pdf</a>, <a href="https://arxiv.org/format/2409.17508">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Uni-Med: A Unified Medical Generalist Foundation Model For Multi-Task Learning Via Connector-MoE </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xun Zhu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Ying Hu</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fanbin Mo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Miao Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Ji Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17508v2-abstract-short" style="display: inline;"> Multi-modal large language models (MLLMs) have shown impressive capabilities as a general-purpose interface for various visual and linguistic tasks. However, building a unified MLLM for multi-task learning in the medical field remains a thorny challenge. To mitigate the tug-of-war problem of multi-modal multi-task optimization in MLLMs, recent advances primarily focus on improving the LLM componen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17508v2-abstract-full').style.display = 'inline'; document.getElementById('2409.17508v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17508v2-abstract-full" style="display: none;"> Multi-modal large language models (MLLMs) have shown impressive capabilities as a general-purpose interface for various visual and linguistic tasks. However, building a unified MLLM for multi-task learning in the medical field remains a thorny challenge. To mitigate the tug-of-war problem of multi-modal multi-task optimization in MLLMs, recent advances primarily focus on improving the LLM components, while neglecting the connector that bridges the gap between modalities. In this paper, we introduce Uni-Med, a novel medical generalist foundation model which consists of a universal visual feature extraction module, a connector mixture-of-experts (CMoE) module, and an LLM. Benefiting from the proposed CMoE that leverages a well-designed router with a mixture of projection experts at the connector, Uni-Med achieves efficient solution to the tug-of-war problem and can perform six different medical tasks including question answering, visual question answering, report generation, referring expression comprehension, referring expression generation and image classification. To the best of our knowledge, Uni-Med is the first effort to tackle multi-task interference at the connector in MLLMs. Extensive ablation experiments validate the effectiveness of introducing CMoE under any configuration, with up to an average 8% performance gains. We further provide interpretation analysis of the tug-of-war problem from the perspective of gradient optimization and parameter statistics. Compared to previous state-of-the-art medical MLLMs, Uni-Med achieves competitive or superior evaluation metrics on diverse tasks. Code and resources are available at https://github.com/tsinghua-msiip/Uni-Med. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17508v2-abstract-full').style.display = 'none'; document.getElementById('2409.17508v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20189">arXiv:2407.20189</a> <span> [<a href="https://arxiv.org/pdf/2407.20189">pdf</a>, <a href="https://arxiv.org/format/2407.20189">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Aligning Query Representation with Rewritten Query and Relevance Judgments in Conversational Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+C">Chen Qu</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+K">Kelong Mao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yihong Wu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhan Su</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20189v1-abstract-short" style="display: inline;"> Conversational search supports multi-turn user-system interactions to solve complex information needs. Different from the traditional single-turn ad-hoc search, conversational search encounters a more challenging problem of context-dependent query understanding with the lengthy and long-tail conversational history context. While conversational query rewriting methods leverage explicit rewritten qu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20189v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20189v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20189v1-abstract-full" style="display: none;"> Conversational search supports multi-turn user-system interactions to solve complex information needs. Different from the traditional single-turn ad-hoc search, conversational search encounters a more challenging problem of context-dependent query understanding with the lengthy and long-tail conversational history context. While conversational query rewriting methods leverage explicit rewritten queries to train a rewriting model to transform the context-dependent query into a stand-stone search query, this is usually done without considering the quality of search results. Conversational dense retrieval methods use fine-tuning to improve a pre-trained ad-hoc query encoder, but they are limited by the conversational search data available for training. In this paper, we leverage both rewritten queries and relevance judgments in the conversational search data to train a better query representation model. The key idea is to align the query representation with those of rewritten queries and relevant documents. The proposed model -- Query Representation Alignment Conversational Dense Retriever, QRACDR, is tested on eight datasets, including various settings in conversational search and ad-hoc search. The results demonstrate the strong performance of QRACDR compared with state-of-the-art methods, and confirm the effectiveness of representation alignment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20189v1-abstract-full').style.display = 'none'; document.getElementById('2407.20189v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CIKM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.16192">arXiv:2407.16192</a> <span> [<a href="https://arxiv.org/pdf/2407.16192">pdf</a>, <a href="https://arxiv.org/format/2407.16192">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> How to Leverage Personal Textual Knowledge for Personalized Conversational Information Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Longxiang Zhao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yue Dong</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+D">Degen Huang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.16192v1-abstract-short" style="display: inline;"> Personalized conversational information retrieval (CIR) combines conversational and personalizable elements to satisfy various users' complex information needs through multi-turn interaction based on their backgrounds. The key promise is that the personal textual knowledge base (PTKB) can improve the CIR effectiveness because the retrieval results can be more related to the user's background. Howe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16192v1-abstract-full').style.display = 'inline'; document.getElementById('2407.16192v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.16192v1-abstract-full" style="display: none;"> Personalized conversational information retrieval (CIR) combines conversational and personalizable elements to satisfy various users' complex information needs through multi-turn interaction based on their backgrounds. The key promise is that the personal textual knowledge base (PTKB) can improve the CIR effectiveness because the retrieval results can be more related to the user's background. However, PTKB is noisy: not every piece of knowledge in PTKB is relevant to the specific query at hand. In this paper, we explore and test several ways to select knowledge from PTKB and use it for query reformulation by using a large language model (LLM). The experimental results show the PTKB might not always improve the search results when used alone, but LLM can help generate a more appropriate personalized query when high-quality guidance is provided. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16192v1-abstract-full').style.display = 'none'; document.getElementById('2407.16192v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CIKM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02719">arXiv:2407.02719</a> <span> [<a href="https://arxiv.org/pdf/2407.02719">pdf</a>, <a href="https://arxiv.org/format/2407.02719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Boosting Biomedical Concept Extraction by Rule-Based Data Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+Q">Qiwei Shao</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02719v1-abstract-short" style="display: inline;"> Document-level biomedical concept extraction is the task of identifying biomedical concepts mentioned in a given document. Recent advancements have adapted pre-trained language models for this task. However, the scarcity of domain-specific data and the deviation of concepts from their canonical names often hinder these models' effectiveness. To tackle this issue, we employ MetaMapLite, an existing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02719v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02719v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02719v1-abstract-full" style="display: none;"> Document-level biomedical concept extraction is the task of identifying biomedical concepts mentioned in a given document. Recent advancements have adapted pre-trained language models for this task. However, the scarcity of domain-specific data and the deviation of concepts from their canonical names often hinder these models' effectiveness. To tackle this issue, we employ MetaMapLite, an existing rule-based concept mapping system, to generate additional pseudo-annotated data from PubMed and PMC. The annotated data are used to augment the limited training data. Through extensive experiments, this study demonstrates the utility of a manually crafted concept mapping tool for training a better concept extraction model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02719v1-abstract-full').style.display = 'none'; document.getElementById('2407.02719v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00904">arXiv:2407.00904</a> <span> [<a href="https://arxiv.org/pdf/2407.00904">pdf</a>, <a href="https://arxiv.org/format/2407.00904">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Background-aware Multi-source Fusion Financial Trend Forecasting Mechanism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengting Mo</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+S">Shanshan Yan</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Y">Yinhao Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00904v1-abstract-short" style="display: inline;"> Stock prices, as an economic indicator, reflect changes in economic development and market conditions. Traditional stock price prediction models often only consider time-series data and are limited by the mechanisms of the models themselves. Some deep learning models have high computational costs, depend on a large amount of high-quality data, and have poor interpretations, making it difficult to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00904v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00904v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00904v1-abstract-full" style="display: none;"> Stock prices, as an economic indicator, reflect changes in economic development and market conditions. Traditional stock price prediction models often only consider time-series data and are limited by the mechanisms of the models themselves. Some deep learning models have high computational costs, depend on a large amount of high-quality data, and have poor interpretations, making it difficult to intuitively understand the driving factors behind the predictions. Some studies have used deep learning models to extract text features and combine them with price data to make joint predictions, but there are issues with dealing with information noise, accurate extraction of text sentiment, and how to efficiently fuse text and numerical data. To address these issues in this paper, we propose a background-aware multi-source fusion financial trend forecasting mechanism. The system leverages a large language model to extract key information from policy and stock review texts, utilizing the MacBERT model to generate feature vectors. These vectors are then integrated with stock price data to form comprehensive feature representations. These integrated features are input into a neural network comprising various deep learning architectures. By integrating multiple data sources, the system offers a holistic view of market dynamics. It harnesses the comprehensive analytical and interpretative capabilities of large language models, retaining deep semantic and sentiment information from policy texts to provide richer input features for stock trend prediction. Additionally, we compare the accuracy of six models (LSTM, BiLSTM, MogrifierLSTM, GRU, ST-LSTM, SwinLSTM). The results demonstrate that our system achieves generally better accuracy in predicting stock movements, attributed to the incorporation of large language model processing, policy information, and other influential features. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00904v1-abstract-full').style.display = 'none'; document.getElementById('2407.00904v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13996">arXiv:2406.13996</a> <span> [<a href="https://arxiv.org/pdf/2406.13996">pdf</a>, <a href="https://arxiv.org/ps/2406.13996">ps</a>, <a href="https://arxiv.org/format/2406.13996">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3637528.3671840">10.1145/3637528.3671840 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Unifying Graph Convolution and Contrastive Learning in Collaborative Filtering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yihong Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Le Zhang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+T">Tianyu Zhu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+W">Weizhi Ma</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13996v2-abstract-short" style="display: inline;"> Graph-based models and contrastive learning have emerged as prominent methods in Collaborative Filtering (CF). While many existing models in CF incorporate these methods in their design, there seems to be a limited depth of analysis regarding the foundational principles behind them. This paper bridges graph convolution, a pivotal element of graph-based models, with contrastive learning through a t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13996v2-abstract-full').style.display = 'inline'; document.getElementById('2406.13996v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13996v2-abstract-full" style="display: none;"> Graph-based models and contrastive learning have emerged as prominent methods in Collaborative Filtering (CF). While many existing models in CF incorporate these methods in their design, there seems to be a limited depth of analysis regarding the foundational principles behind them. This paper bridges graph convolution, a pivotal element of graph-based models, with contrastive learning through a theoretical framework. By examining the learning dynamics and equilibrium of the contrastive loss, we offer a fresh lens to understand contrastive learning via graph theory, emphasizing its capability to capture high-order connectivity. Building on this analysis, we further show that the graph convolutional layers often used in graph-based models are not essential for high-order connectivity modeling and might contribute to the risk of oversmoothing. Stemming from our findings, we introduce Simple Contrastive Collaborative Filtering (SCCF), a simple and effective algorithm based on a naive embedding model and a modified contrastive loss. The efficacy of the algorithm is demonstrated through extensive experiments across four public datasets. The experiment code is available at \url{https://github.com/wu1hong/SCCF}. \end{abstract} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13996v2-abstract-full').style.display = 'none'; document.getElementById('2406.13996v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">KDD 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11177">arXiv:2406.11177</a> <span> [<a href="https://arxiv.org/pdf/2406.11177">pdf</a>, <a href="https://arxiv.org/format/2406.11177">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Retrieval-Augmented Feature Generation for Domain-Specific Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinhao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jinghan Zhang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuzhong Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kunpeng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11177v2-abstract-short" style="display: inline;"> Feature generation can significantly enhance learning outcomes, particularly for tasks with limited data. An effective way to improve feature generation is by expanding the current feature space using existing features and enriching the informational content. However, generating new, interpretable features in application fields often requires domain-specific knowledge about the existing features.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11177v2-abstract-full').style.display = 'inline'; document.getElementById('2406.11177v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11177v2-abstract-full" style="display: none;"> Feature generation can significantly enhance learning outcomes, particularly for tasks with limited data. An effective way to improve feature generation is by expanding the current feature space using existing features and enriching the informational content. However, generating new, interpretable features in application fields often requires domain-specific knowledge about the existing features. This paper introduces a new method RAFG for generating reasonable and explainable features specific to domain classification tasks. To generate new features with interpretability in domain knowledge, we perform information retrieval on existing features to identify potential feature associations, and utilize these associations to generate meaningful features. Furthermore, we develop a Large Language Model (LLM)-based framework for feature generation with reasoning to verify and filter features during the generation process. Experiments across several datasets in medical, economic, and geographic domains show that our RAFG method produces high-quality, meaningful features and significantly improves classification performance compared with baseline methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11177v2-abstract-full').style.display = 'none'; document.getElementById('2406.11177v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05013">arXiv:2406.05013</a> <span> [<a href="https://arxiv.org/pdf/2406.05013">pdf</a>, <a href="https://arxiv.org/format/2406.05013">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CHIQ: Contextual History Enhancement for Improving Query Rewriting in Conversational Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Ghaddar%2C+A">Abbas Ghaddar</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+K">Kelong Mao</a>, <a href="/search/cs?searchtype=author&query=Rezagholizadeh%2C+M">Mehdi Rezagholizadeh</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Boxing Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qun Liu</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05013v2-abstract-short" style="display: inline;"> In this paper, we study how open-source large language models (LLMs) can be effectively deployed for improving query rewriting in conversational search, especially for ambiguous queries. We introduce CHIQ, a two-step method that leverages the capabilities of LLMs to resolve ambiguities in the conversation history before query rewriting. This approach contrasts with prior studies that predominantly… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05013v2-abstract-full').style.display = 'inline'; document.getElementById('2406.05013v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05013v2-abstract-full" style="display: none;"> In this paper, we study how open-source large language models (LLMs) can be effectively deployed for improving query rewriting in conversational search, especially for ambiguous queries. We introduce CHIQ, a two-step method that leverages the capabilities of LLMs to resolve ambiguities in the conversation history before query rewriting. This approach contrasts with prior studies that predominantly use closed-source LLMs to directly generate search queries from conversation history. We demonstrate on five well-established benchmarks that CHIQ leads to state-of-the-art results across most settings, showing highly competitive performances with systems leveraging closed-source LLMs. Our study provides a first step towards leveraging open-source LLMs in conversational search, as a competitive alternative to the prevailing reliance on commercial LLMs. Data, models, and source code will be publicly available upon acceptance at https://github.com/fengranMark/CHIQ. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05013v2-abstract-full').style.display = 'none'; document.getElementById('2406.05013v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17357">arXiv:2405.17357</a> <span> [<a href="https://arxiv.org/pdf/2405.17357">pdf</a>, <a href="https://arxiv.org/format/2405.17357">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DoRA: Enhancing Parameter-Efficient Fine-Tuning with Dynamic Rank Distribution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mao%2C+Y">Yulong Mao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+C">Changhao Guan</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+G">Ganglin Bao</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jinan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17357v3-abstract-short" style="display: inline;"> Fine-tuning large-scale pre-trained models is inherently a resource-intensive task. While it can enhance the capabilities of the model, it also incurs substantial computational costs, posing challenges to the practical application of downstream tasks. Existing parameter-efficient fine-tuning (PEFT) methods such as Low-Rank Adaptation (LoRA) rely on a bypass framework that ignores the differential… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17357v3-abstract-full').style.display = 'inline'; document.getElementById('2405.17357v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17357v3-abstract-full" style="display: none;"> Fine-tuning large-scale pre-trained models is inherently a resource-intensive task. While it can enhance the capabilities of the model, it also incurs substantial computational costs, posing challenges to the practical application of downstream tasks. Existing parameter-efficient fine-tuning (PEFT) methods such as Low-Rank Adaptation (LoRA) rely on a bypass framework that ignores the differential parameter budget requirements across weight matrices, which may lead to suboptimal fine-tuning outcomes. To address this issue, we introduce the Dynamic Low-Rank Adaptation (DoRA) method. DoRA decomposes high-rank LoRA layers into structured single-rank components, allowing for dynamic pruning of parameter budget based on their importance to specific tasks during training, which makes the most of the limited parameter budget. Experimental results demonstrate that DoRA can achieve competitive performance compared with LoRA and full model fine-tuning, and outperform various strong baselines with the same storage parameter budget. Our code is available at https://github.com/MIkumikumi0116/DoRA <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17357v3-abstract-full').style.display = 'none'; document.getElementById('2405.17357v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the main conference of ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16671">arXiv:2405.16671</a> <span> [<a href="https://arxiv.org/pdf/2405.16671">pdf</a>, <a href="https://arxiv.org/format/2405.16671">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Mixture of Latent Experts Using Tensor Products </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhan Su</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Tiwari%2C+P">Prayag Tiwari</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Benyou Wang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a>, <a href="/search/cs?searchtype=author&query=Simonsen%2C+J+G">Jakob Grue Simonsen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16671v2-abstract-short" style="display: inline;"> In multi-task learning, the conventional approach involves training a model on multiple tasks simultaneously. However, the training signals from different tasks can interfere with one another, potentially leading to \textit{negative transfer}. To mitigate this, we investigate if modular language models can facilitate positive transfer and systematic generalization. Specifically, we propose a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16671v2-abstract-full').style.display = 'inline'; document.getElementById('2405.16671v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16671v2-abstract-full" style="display: none;"> In multi-task learning, the conventional approach involves training a model on multiple tasks simultaneously. However, the training signals from different tasks can interfere with one another, potentially leading to \textit{negative transfer}. To mitigate this, we investigate if modular language models can facilitate positive transfer and systematic generalization. Specifically, we propose a novel modular language model (\texttt{TensorPoly}), that balances parameter efficiency with nuanced routing methods. For \textit{modules}, we reparameterize Low-Rank Adaptation (\texttt{LoRA}) by employing an entangled tensor through the use of tensor product operations and name the resulting approach \texttt{TLoRA}. For \textit{routing function}, we tailor two innovative routing functions according to the granularity: \texttt{TensorPoly-I} which directs to each rank within the entangled tensor while \texttt{TensorPoly-II} offers a finer-grained routing approach targeting each order of the entangled tensor. The experimental results from the multi-task T0-benchmark demonstrate that: 1) all modular LMs surpass the corresponding dense approaches, highlighting the potential of modular language models to mitigate negative inference in multi-task learning and deliver superior outcomes. 2) \texttt{TensorPoly-I} achieves higher parameter efficiency in adaptation and outperforms other modular LMs, which shows the potential of our approach in multi-task transfer learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16671v2-abstract-full').style.display = 'none'; document.getElementById('2405.16671v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://github.com/microsoft/mttl/tree/zs_code</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10936">arXiv:2405.10936</a> <span> [<a href="https://arxiv.org/pdf/2405.10936">pdf</a>, <a href="https://arxiv.org/format/2405.10936">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Large Language Models with Multilingualism: Recent Advances and New Frontiers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongliang Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">You Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuanchi Zhang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+W">Weijian Yi</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+Y">Yulong Mao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jinchen Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yuzhuang Xu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jinan Xu</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10936v2-abstract-short" style="display: inline;"> The rapid development of Large Language Models (LLMs) demonstrates remarkable multilingual capabilities in natural language processing, attracting global attention in both academia and industry. To mitigate potential discrimination and enhance the overall usability and accessibility for diverse language user groups, it is important for the development of language-fair technology. Despite the break… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10936v2-abstract-full').style.display = 'inline'; document.getElementById('2405.10936v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10936v2-abstract-full" style="display: none;"> The rapid development of Large Language Models (LLMs) demonstrates remarkable multilingual capabilities in natural language processing, attracting global attention in both academia and industry. To mitigate potential discrimination and enhance the overall usability and accessibility for diverse language user groups, it is important for the development of language-fair technology. Despite the breakthroughs of LLMs, the investigation into the multilingual scenario remains insufficient, where a comprehensive survey to summarize recent approaches, developments, limitations, and potential solutions is desirable. To this end, we provide a survey with multiple perspectives on the utilization of LLMs in the multilingual scenario. We first rethink the transitions between previous and current research on pre-trained language models. Then we introduce several perspectives on the multilingualism of LLMs, including training and inference methods, information retrieval, model security, multi-domain with language culture, and usage of datasets. We also discuss the major challenges that arise in these aspects, along with possible solutions. Besides, we highlight future research directions that aim at further enhancing LLMs with multilingualism. The survey aims to help the research community address multilingual problems and provide a comprehensive understanding of the core concepts, key techniques, and latest developments in multilingual natural language processing based on LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10936v2-abstract-full').style.display = 'none'; document.getElementById('2405.10936v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">65 pages, Work in Progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.06545">arXiv:2405.06545</a> <span> [<a href="https://arxiv.org/pdf/2405.06545">pdf</a>, <a href="https://arxiv.org/format/2405.06545">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mitigating Hallucinations in Large Language Models via Self-Refinement-Enhanced Knowledge Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Niu%2C+M">Mengjia Niu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+J">Jie Shi</a>, <a href="/search/cs?searchtype=author&query=Haddadi%2C+H">Hamed Haddadi</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.06545v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable capabilities across various domains, although their susceptibility to hallucination poses significant challenges for their deployment in critical areas such as healthcare. To address this issue, retrieving relevant facts from knowledge graphs (KGs) is considered a promising method. Existing KG-augmented approaches tend to be resource-intens… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06545v1-abstract-full').style.display = 'inline'; document.getElementById('2405.06545v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.06545v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable capabilities across various domains, although their susceptibility to hallucination poses significant challenges for their deployment in critical areas such as healthcare. To address this issue, retrieving relevant facts from knowledge graphs (KGs) is considered a promising method. Existing KG-augmented approaches tend to be resource-intensive, requiring multiple rounds of retrieval and verification for each factoid, which impedes their application in real-world scenarios. In this study, we propose Self-Refinement-Enhanced Knowledge Graph Retrieval (Re-KGR) to augment the factuality of LLMs' responses with less retrieval efforts in the medical field. Our approach leverages the attribution of next-token predictive probability distributions across different tokens, and various model layers to primarily identify tokens with a high potential for hallucination, reducing verification rounds by refining knowledge triples associated with these tokens. Moreover, we rectify inaccurate content using retrieved knowledge in the post-processing stage, which improves the truthfulness of generated responses. Experimental results on a medical dataset demonstrate that our approach can enhance the factual capability of LLMs across various foundational models as evidenced by the highest scores on truthfulness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06545v1-abstract-full').style.display = 'none'; document.getElementById('2405.06545v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7; H.3.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04590">arXiv:2405.04590</a> <span> [<a href="https://arxiv.org/pdf/2405.04590">pdf</a>, <a href="https://arxiv.org/format/2405.04590">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Language Modeling Using Tensor Trains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhan Su</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuqin Zhou</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Simonsen%2C+J+G">Jakob Grue Simonsen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04590v1-abstract-short" style="display: inline;"> We propose a novel tensor network language model based on the simplest tensor network (i.e., tensor trains), called `Tensor Train Language Model' (TTLM). TTLM represents sentences in an exponential space constructed by the tensor product of words, but computing the probabilities of sentences in a low-dimensional fashion. We demonstrate that the architectures of Second-order RNNs, Recurrent Arithme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04590v1-abstract-full').style.display = 'inline'; document.getElementById('2405.04590v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04590v1-abstract-full" style="display: none;"> We propose a novel tensor network language model based on the simplest tensor network (i.e., tensor trains), called `Tensor Train Language Model' (TTLM). TTLM represents sentences in an exponential space constructed by the tensor product of words, but computing the probabilities of sentences in a low-dimensional fashion. We demonstrate that the architectures of Second-order RNNs, Recurrent Arithmetic Circuits (RACs), and Multiplicative Integration RNNs are, essentially, special cases of TTLM. Experimental evaluations on real language modeling tasks show that the proposed variants of TTLM (i.e., TTLM-Large and TTLM-Tiny) outperform the vanilla Recurrent Neural Networks (RNNs) with low-scale of hidden units. (The code is available at https://github.com/shuishen112/tensortrainlm.) <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04590v1-abstract-full').style.display = 'none'; document.getElementById('2405.04590v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16037">arXiv:2404.16037</a> <span> [<a href="https://arxiv.org/pdf/2404.16037">pdf</a>, <a href="https://arxiv.org/format/2404.16037">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> </div> </div> <p class="title is-5 mathjax"> VN-Net: Vision-Numerical Fusion Graph Convolutional Network for Sparse Spatio-Temporal Meteorological Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiong%2C+Y">Yutong Xiong</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xun Zhu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Ming Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Weiqing Li</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fanbin Mo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chuang Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16037v1-abstract-short" style="display: inline;"> Sparse meteorological forecasting is indispensable for fine-grained weather forecasting and deserves extensive attention. Recent studies have highlighted the potential of spatio-temporal graph convolutional networks (ST-GCNs) in predicting numerical data from ground weather stations. However, as one of the highest fidelity and lowest latency data, the application of the vision data from satellites… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16037v1-abstract-full').style.display = 'inline'; document.getElementById('2404.16037v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16037v1-abstract-full" style="display: none;"> Sparse meteorological forecasting is indispensable for fine-grained weather forecasting and deserves extensive attention. Recent studies have highlighted the potential of spatio-temporal graph convolutional networks (ST-GCNs) in predicting numerical data from ground weather stations. However, as one of the highest fidelity and lowest latency data, the application of the vision data from satellites in ST-GCNs remains unexplored. There are few studies to demonstrate the effectiveness of combining the above multi-modal data for sparse meteorological forecasting. Towards this objective, we introduce Vision-Numerical Fusion Graph Convolutional Network (VN-Net), which mainly utilizes: 1) Numerical-GCN (N-GCN) to adaptively model the static and dynamic patterns of spatio-temporal numerical data; 2) Vision-LSTM Network (V-LSTM) to capture multi-scale joint channel and spatial features from time series satellite images; 4) a GCN-based decoder to generate hourly predictions of specified meteorological factors. As far as we know, VN-Net is the first attempt to introduce GCN method to utilize multi-modal data for better handling sparse spatio-temporal meteorological forecasting. Our experiments on Weather2k dataset show VN-Net outperforms state-of-the-art by a significant margin on mean absolute error (MAE) and root mean square error (RMSE) for temperature, relative humidity, and visibility forecasting. Furthermore, we conduct interpretation analysis and design quantitative evaluation metrics to assess the impact of incorporating vision data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16037v1-abstract-full').style.display = 'none'; document.getElementById('2404.16037v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13940">arXiv:2404.13940</a> <span> [<a href="https://arxiv.org/pdf/2404.13940">pdf</a>, <a href="https://arxiv.org/format/2404.13940">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A User-Centric Multi-Intent Benchmark for Evaluating Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiayin Wang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+W">Weizhi Ma</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+P">Peijie Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Min Zhang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13940v3-abstract-short" style="display: inline;"> Large language models (LLMs) are essential tools that users employ across various scenarios, so evaluating their performance and guiding users in selecting the suitable service is important. Although many benchmarks exist, they mainly focus on specific predefined model abilities, such as world knowledge, reasoning, etc. Based on these ability scores, it is hard for users to determine which LLM bes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13940v3-abstract-full').style.display = 'inline'; document.getElementById('2404.13940v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13940v3-abstract-full" style="display: none;"> Large language models (LLMs) are essential tools that users employ across various scenarios, so evaluating their performance and guiding users in selecting the suitable service is important. Although many benchmarks exist, they mainly focus on specific predefined model abilities, such as world knowledge, reasoning, etc. Based on these ability scores, it is hard for users to determine which LLM best suits their particular needs. To address these issues, we propose to evaluate LLMs from a user-centric perspective and design this benchmark to measure their efficacy in satisfying user needs under distinct intents. Firstly, we collect 1,846 real-world use cases from a user study with 712 participants from 23 countries. This first-hand data helps us understand actual user intents and needs in LLM interactions, forming the User Reported Scenarios (URS) dataset, which is categorized with six types of user intents. Secondly, based on this authentic dataset, we benchmark 10 LLM services with GPT-4-as-Judge. Thirdly, we show that benchmark scores align well with human preference in both real-world experience and pair-wise annotations, achieving Pearson correlations of 0.95 and 0.94, respectively. This alignment confirms that the URS dataset and our evaluation method establish an effective user-centric benchmark. The dataset, code, and process data are available at https://github.com/Alice1998/URS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13940v3-abstract-full').style.display = 'none'; document.getElementById('2404.13940v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13556">arXiv:2404.13556</a> <span> [<a href="https://arxiv.org/pdf/2404.13556">pdf</a>, <a href="https://arxiv.org/format/2404.13556">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ChatRetriever: Adapting Large Language Models for Generalized and Robust Conversational Dense Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mao%2C+K">Kelong Mao</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+C">Chenlong Deng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haonan Chen</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zheng Liu</a>, <a href="/search/cs?searchtype=author&query=Sakai%2C+T">Tetsuya Sakai</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+Z">Zhicheng Dou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13556v1-abstract-short" style="display: inline;"> Conversational search requires accurate interpretation of user intent from complex multi-turn contexts. This paper presents ChatRetriever, which inherits the strong generalization capability of large language models to robustly represent complex conversational sessions for dense retrieval. To achieve this, we propose a simple and effective dual-learning approach that adapts LLM for retrieval via c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13556v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13556v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13556v1-abstract-full" style="display: none;"> Conversational search requires accurate interpretation of user intent from complex multi-turn contexts. This paper presents ChatRetriever, which inherits the strong generalization capability of large language models to robustly represent complex conversational sessions for dense retrieval. To achieve this, we propose a simple and effective dual-learning approach that adapts LLM for retrieval via contrastive learning while enhancing the complex session understanding through masked instruction tuning on high-quality conversational instruction tuning data. Extensive experiments on five conversational search benchmarks demonstrate that ChatRetriever substantially outperforms existing conversational dense retrievers, achieving state-of-the-art performance on par with LLM-based rewriting approaches. Furthermore, ChatRetriever exhibits superior robustness in handling diverse conversational contexts. Our work highlights the potential of adapting LLMs for retrieval with complex inputs like conversational search sessions and proposes an effective approach to advance this research direction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13556v1-abstract-full').style.display = 'none'; document.getElementById('2404.13556v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09114">arXiv:2404.09114</a> <span> [<a href="https://arxiv.org/pdf/2404.09114">pdf</a>, <a href="https://arxiv.org/format/2404.09114">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Intelligent Chemical Purification Technique Based on Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+W">Wenchao Wu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hao Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dongxiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fanyang Mo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09114v1-abstract-short" style="display: inline;"> We present an innovative of artificial intelligence with column chromatography, aiming to resolve inefficiencies and standardize data collection in chemical separation and purification domain. By developing an automated platform for precise data acquisition and employing advanced machine learning algorithms, we constructed predictive models to forecast key separation parameters, thereby enhancing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09114v1-abstract-full').style.display = 'inline'; document.getElementById('2404.09114v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09114v1-abstract-full" style="display: none;"> We present an innovative of artificial intelligence with column chromatography, aiming to resolve inefficiencies and standardize data collection in chemical separation and purification domain. By developing an automated platform for precise data acquisition and employing advanced machine learning algorithms, we constructed predictive models to forecast key separation parameters, thereby enhancing the efficiency and quality of chromatographic processes. The application of transfer learning allows the model to adapt across various column specifications, broadening its utility. A novel metric, separation probability ($S_p$), quantifies the likelihood of effective compound separation, validated through experimental verification. This study signifies a significant step forward int the application of AI in chemical research, offering a scalable solution to traditional chromatography challenges and providing a foundation for future technological advancements in chemical analysis and purification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09114v1-abstract-full').style.display = 'none'; document.getElementById('2404.09114v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 5 Figures, Submitted to Nature Machine Intelligence</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11335">arXiv:2403.11335</a> <span> [<a href="https://arxiv.org/pdf/2403.11335">pdf</a>, <a href="https://arxiv.org/format/2403.11335">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ConvSDG: Session Data Generation for Conversational Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+B">Bole Yi</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+K">Kelong Mao</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+C">Chen Qu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11335v1-abstract-short" style="display: inline;"> Conversational search provides a more convenient interface for users to search by allowing multi-turn interaction with the search engine. However, the effectiveness of the conversational dense retrieval methods is limited by the scarcity of training data required for their fine-tuning. Thus, generating more training conversational sessions with relevant labels could potentially improve search perf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11335v1-abstract-full').style.display = 'inline'; document.getElementById('2403.11335v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11335v1-abstract-full" style="display: none;"> Conversational search provides a more convenient interface for users to search by allowing multi-turn interaction with the search engine. However, the effectiveness of the conversational dense retrieval methods is limited by the scarcity of training data required for their fine-tuning. Thus, generating more training conversational sessions with relevant labels could potentially improve search performance. Based on the promising capabilities of large language models (LLMs) on text generation, we propose ConvSDG, a simple yet effective framework to explore the feasibility of boosting conversational search by using LLM for session data generation. Within this framework, we design dialogue/session-level and query-level data generation with unsupervised and semi-supervised learning, according to the availability of relevance judgments. The generated data are used to fine-tune the conversational dense retriever. Extensive experiments on four widely used datasets demonstrate the effectiveness and broad applicability of our ConvSDG framework compared with several strong baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11335v1-abstract-full').style.display = 'none'; document.getElementById('2403.11335v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW 2024 Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.03112">arXiv:2402.03112</a> <span> [<a href="https://arxiv.org/pdf/2402.03112">pdf</a>, <a href="https://arxiv.org/format/2402.03112">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Infrared Spectra Prediction for Diazo Groups Utilizing a Machine Learning Approach with Structural Attention Mechanism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengchun Liu</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fanyang Mo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.03112v1-abstract-short" style="display: inline;"> Infrared (IR) spectroscopy is a pivotal technique in chemical research for elucidating molecular structures and dynamics through vibrational and rotational transitions. However, the intricate molecular fingerprints characterized by unique vibrational and rotational patterns present substantial analytical challenges. Here, we present a machine learning approach employing a Structural Attention Mech… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03112v1-abstract-full').style.display = 'inline'; document.getElementById('2402.03112v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.03112v1-abstract-full" style="display: none;"> Infrared (IR) spectroscopy is a pivotal technique in chemical research for elucidating molecular structures and dynamics through vibrational and rotational transitions. However, the intricate molecular fingerprints characterized by unique vibrational and rotational patterns present substantial analytical challenges. Here, we present a machine learning approach employing a Structural Attention Mechanism tailored to enhance the prediction and interpretation of infrared spectra, particularly for diazo compounds. Our model distinguishes itself by honing in on chemical information proximal to functional groups, thereby significantly bolstering the accuracy, robustness, and interpretability of spectral predictions. This method not only demystifies the correlations between infrared spectral features and molecular structures but also offers a scalable and efficient paradigm for dissecting complex molecular interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03112v1-abstract-full').style.display = 'none'; document.getElementById('2402.03112v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.16659">arXiv:2401.16659</a> <span> [<a href="https://arxiv.org/pdf/2401.16659">pdf</a>, <a href="https://arxiv.org/format/2401.16659">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> History-Aware Conversational Dense Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+C">Chen Qu</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+K">Kelong Mao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+T">Tianyu Zhu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhan Su</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.16659v3-abstract-short" style="display: inline;"> Conversational search facilitates complex information retrieval by enabling multi-turn interactions between users and the system. Supporting such interactions requires a comprehensive understanding of the conversational inputs to formulate a good search query based on historical information. In particular, the search query should include the relevant information from the previous conversation turn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16659v3-abstract-full').style.display = 'inline'; document.getElementById('2401.16659v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.16659v3-abstract-full" style="display: none;"> Conversational search facilitates complex information retrieval by enabling multi-turn interactions between users and the system. Supporting such interactions requires a comprehensive understanding of the conversational inputs to formulate a good search query based on historical information. In particular, the search query should include the relevant information from the previous conversation turns. However, current approaches for conversational dense retrieval primarily rely on fine-tuning a pre-trained ad-hoc retriever using the whole conversational search session, which can be lengthy and noisy. Moreover, existing approaches are limited by the amount of manual supervision signals in the existing datasets. To address the aforementioned issues, we propose a History-Aware Conversational Dense Retrieval (HAConvDR) system, which incorporates two ideas: context-denoised query reformulation and automatic mining of supervision signals based on the actual impact of historical turns. Experiments on two public conversational search datasets demonstrate the improved history modeling capability of HAConvDR, in particular for long conversations with topic shifts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16659v3-abstract-full').style.display = 'none'; document.getElementById('2401.16659v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13904">arXiv:2401.13904</a> <span> [<a href="https://arxiv.org/pdf/2401.13904">pdf</a>, <a href="https://arxiv.org/format/2401.13904">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Empowering Machines to Think Like Chemists: Unveiling Molecular Structure-Polarity Relationships with Hierarchical Symbolic Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lou%2C+S">Siyu Lou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengchun Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuntian Chen</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fanyang Mo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13904v1-abstract-short" style="display: inline;"> Thin-layer chromatography (TLC) is a crucial technique in molecular polarity analysis. Despite its importance, the interpretability of predictive models for TLC, especially those driven by artificial intelligence, remains a challenge. Current approaches, utilizing either high-dimensional molecular fingerprints or domain-knowledge-driven feature engineering, often face a dilemma between expressiven… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13904v1-abstract-full').style.display = 'inline'; document.getElementById('2401.13904v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13904v1-abstract-full" style="display: none;"> Thin-layer chromatography (TLC) is a crucial technique in molecular polarity analysis. Despite its importance, the interpretability of predictive models for TLC, especially those driven by artificial intelligence, remains a challenge. Current approaches, utilizing either high-dimensional molecular fingerprints or domain-knowledge-driven feature engineering, often face a dilemma between expressiveness and interpretability. To bridge this gap, we introduce Unsupervised Hierarchical Symbolic Regression (UHiSR), combining hierarchical neural networks and symbolic regression. UHiSR automatically distills chemical-intuitive polarity indices, and discovers interpretable equations that link molecular structure to chromatographic behavior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13904v1-abstract-full').style.display = 'none'; document.getElementById('2401.13904v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.03403">arXiv:2401.03403</a> <span> [<a href="https://arxiv.org/pdf/2401.03403">pdf</a>, <a href="https://arxiv.org/format/2401.03403">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Deep peak property learning for efficient chiral molecules ECD spectra prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Long%2C+D">Da Long</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+L">Li Yuan</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Yonghong Tian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinchang Wang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fanyang Mo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.03403v1-abstract-short" style="display: inline;"> Chiral molecule assignation is crucial for asymmetric catalysis, functional materials, and the drug industry. The conventional approach requires theoretical calculations of electronic circular dichroism (ECD) spectra, which is time-consuming and costly. To speed up this process, we have incorporated deep learning techniques for the ECD prediction. We first set up a large-scale dataset of Chiral Mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03403v1-abstract-full').style.display = 'inline'; document.getElementById('2401.03403v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.03403v1-abstract-full" style="display: none;"> Chiral molecule assignation is crucial for asymmetric catalysis, functional materials, and the drug industry. The conventional approach requires theoretical calculations of electronic circular dichroism (ECD) spectra, which is time-consuming and costly. To speed up this process, we have incorporated deep learning techniques for the ECD prediction. We first set up a large-scale dataset of Chiral Molecular ECD spectra (CMCDS) with calculated ECD spectra. We further develop the ECDFormer model, a Transformer-based model to learn the chiral molecular representations and predict corresponding ECD spectra with improved efficiency and accuracy. Unlike other models for spectrum prediction, our ECDFormer creatively focused on peak properties rather than the whole spectrum sequence for prediction, inspired by the scenario of chiral molecule assignation. Specifically, ECDFormer predicts the peak properties, including number, position, and symbol, then renders the ECD spectra from these peak properties, which significantly outperforms other models in ECD prediction, Our ECDFormer reduces the time of acquiring ECD spectra from 1-100 hours per molecule to 1.5s. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03403v1-abstract-full').style.display = 'none'; document.getElementById('2401.03403v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 8 Figures, Submitted to Nature Machine Intelligence</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.00808">arXiv:2312.00808</a> <span> [<a href="https://arxiv.org/pdf/2312.00808">pdf</a>, <a href="https://arxiv.org/format/2312.00808">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Transforming organic chemistry research paradigms: moving from manual efforts to the intersection of automation and artificial intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengchun Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuntian Chen</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fanyang Mo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.00808v1-abstract-short" style="display: inline;"> Organic chemistry is undergoing a major paradigm shift, moving from a labor-intensive approach to a new era dominated by automation and artificial intelligence (AI). This transformative shift is being driven by technological advances, the ever-increasing demand for greater research efficiency and accuracy, and the burgeoning growth of interdisciplinary research. AI models, supported by computation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.00808v1-abstract-full').style.display = 'inline'; document.getElementById('2312.00808v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.00808v1-abstract-full" style="display: none;"> Organic chemistry is undergoing a major paradigm shift, moving from a labor-intensive approach to a new era dominated by automation and artificial intelligence (AI). This transformative shift is being driven by technological advances, the ever-increasing demand for greater research efficiency and accuracy, and the burgeoning growth of interdisciplinary research. AI models, supported by computational power and algorithms, are drastically reshaping synthetic planning and introducing groundbreaking ways to tackle complex molecular synthesis. In addition, autonomous robotic systems are rapidly accelerating the pace of discovery by performing tedious tasks with unprecedented speed and precision. This article examines the multiple opportunities and challenges presented by this paradigm shift and explores its far-reaching implications. It provides valuable insights into the future trajectory of organic chemistry research, which is increasingly defined by the synergistic interaction of automation and AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.00808v1-abstract-full').style.display = 'none'; document.getElementById('2312.00808v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.01056">arXiv:2311.01056</a> <span> [<a href="https://arxiv.org/pdf/2311.01056">pdf</a>, <a href="https://arxiv.org/format/2311.01056">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3616855.3635787">10.1145/3616855.3635787 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Collaboration and Transition: Distilling Item Transitions into Multi-Query Self-Attention for Sequential Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+T">Tianyu Zhu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yansong Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yihong Wu</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.01056v2-abstract-short" style="display: inline;"> Modern recommender systems employ various sequential modules such as self-attention to learn dynamic user interests. However, these methods are less effective in capturing collaborative and transitional signals within user interaction sequences. First, the self-attention architecture uses the embedding of a single item as the attention query, making it challenging to capture collaborative signals.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.01056v2-abstract-full').style.display = 'inline'; document.getElementById('2311.01056v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.01056v2-abstract-full" style="display: none;"> Modern recommender systems employ various sequential modules such as self-attention to learn dynamic user interests. However, these methods are less effective in capturing collaborative and transitional signals within user interaction sequences. First, the self-attention architecture uses the embedding of a single item as the attention query, making it challenging to capture collaborative signals. Second, these methods typically follow an auto-regressive framework, which is unable to learn global item transition patterns. To overcome these limitations, we propose a new method called Multi-Query Self-Attention with Transition-Aware Embedding Distillation (MQSA-TED). First, we propose an $L$-query self-attention module that employs flexible window sizes for attention queries to capture collaborative signals. In addition, we introduce a multi-query self-attention method that balances the bias-variance trade-off in modeling user preferences by combining long and short-query self-attentions. Second, we develop a transition-aware embedding distillation module that distills global item-to-item transition patterns into item embeddings, which enables the model to memorize and leverage transitional signals and serves as a calibrator for collaborative signals. Experimental results on four real-world datasets demonstrate the effectiveness of the proposed modules. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.01056v2-abstract-full').style.display = 'none'; document.getElementById('2311.01056v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WSDM 2024 Oral Presentation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.13265">arXiv:2310.13265</a> <span> [<a href="https://arxiv.org/pdf/2310.13265">pdf</a>, <a href="https://arxiv.org/format/2310.13265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MoqaGPT : Zero-Shot Multi-modal Open-domain Question Answering with Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Le Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yihong Wu</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+A">Aishwarya Agrawal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.13265v1-abstract-short" style="display: inline;"> Multi-modal open-domain question answering typically requires evidence retrieval from databases across diverse modalities, such as images, tables, passages, etc. Even Large Language Models (LLMs) like GPT-4 fall short in this task. To enable LLMs to tackle the task in a zero-shot manner, we introduce MoqaGPT, a straightforward and flexible framework. Using a divide-and-conquer strategy that bypass… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13265v1-abstract-full').style.display = 'inline'; document.getElementById('2310.13265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.13265v1-abstract-full" style="display: none;"> Multi-modal open-domain question answering typically requires evidence retrieval from databases across diverse modalities, such as images, tables, passages, etc. Even Large Language Models (LLMs) like GPT-4 fall short in this task. To enable LLMs to tackle the task in a zero-shot manner, we introduce MoqaGPT, a straightforward and flexible framework. Using a divide-and-conquer strategy that bypasses intricate multi-modality ranking, our framework can accommodate new modalities and seamlessly transition to new models for the task. Built upon LLMs, MoqaGPT retrieves and extracts answers from each modality separately, then fuses this multi-modal information using LLMs to produce a final answer. Our methodology boosts performance on the MMCoQA dataset, improving F1 by +37.91 points and EM by +34.07 points over the supervised baseline. On the MultiModalQA dataset, MoqaGPT surpasses the zero-shot baseline, improving F1 by 9.5 points and EM by 10.1 points, and significantly closes the gap with supervised methods. Our codebase is available at https://github.com/lezhang7/MOQAGPT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13265v1-abstract-full').style.display = 'none'; document.getElementById('2310.13265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted into EMNLP2023 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.02553">arXiv:2306.02553</a> <span> [<a href="https://arxiv.org/pdf/2306.02553">pdf</a>, <a href="https://arxiv.org/format/2306.02553">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Learning to Relate to Previous Turns in Conversational Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+K">Kelong Mao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yutao Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Peng Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.02553v1-abstract-short" style="display: inline;"> Conversational search allows a user to interact with a search system in multiple turns. A query is strongly dependent on the conversation context. An effective way to improve retrieval effectiveness is to expand the current query with historical queries. However, not all the previous queries are related to, and useful for expanding the current query. In this paper, we propose a new method to selec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.02553v1-abstract-full').style.display = 'inline'; document.getElementById('2306.02553v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.02553v1-abstract-full" style="display: none;"> Conversational search allows a user to interact with a search system in multiple turns. A query is strongly dependent on the conversation context. An effective way to improve retrieval effectiveness is to expand the current query with historical queries. However, not all the previous queries are related to, and useful for expanding the current query. In this paper, we propose a new method to select relevant historical queries that are useful for the current query. To cope with the lack of labeled training data, we use a pseudo-labeling approach to annotate useful historical queries based on their impact on the retrieval results. The pseudo-labeled data are used to train a selection model. We further propose a multi-task learning framework to jointly train the selector and the retriever during fine-tuning, allowing us to mitigate the possible inconsistency between the pseudo labels and the changed retriever. Extensive experiments on four conversational search datasets demonstrate the effectiveness and broad applicability of our method compared with several strong baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.02553v1-abstract-full').style.display = 'none'; document.getElementById('2306.02553v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by SIGKDD 2023 Research Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.15645">arXiv:2305.15645</a> <span> [<a href="https://arxiv.org/pdf/2305.15645">pdf</a>, <a href="https://arxiv.org/format/2305.15645">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ConvGQR: Generative Query Reformulation for Conversational Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+K">Kelong Mao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yutao Zhu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yihong Wu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.15645v3-abstract-short" style="display: inline;"> In conversational search, the user's real search intent for the current turn is dependent on the previous conversation history. It is challenging to determine a good search query from the whole conversation context. To avoid the expensive re-training of the query encoder, most existing methods try to learn a rewriting model to de-contextualize the current query by mimicking the manual query rewrit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15645v3-abstract-full').style.display = 'inline'; document.getElementById('2305.15645v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.15645v3-abstract-full" style="display: none;"> In conversational search, the user's real search intent for the current turn is dependent on the previous conversation history. It is challenging to determine a good search query from the whole conversation context. To avoid the expensive re-training of the query encoder, most existing methods try to learn a rewriting model to de-contextualize the current query by mimicking the manual query rewriting. However, manually rewritten queries are not always the best search queries. Training a rewriting model on them would limit the model's ability to produce good search queries. Another useful hint is the potential answer to the question. In this paper, we propose ConvGQR, a new framework to reformulate conversational queries based on generative pre-trained language models (PLMs), one for query rewriting and another for generating potential answers. By combining both, ConvGQR can produce better search queries. In addition, to relate query reformulation to retrieval performance, we propose a knowledge infusion mechanism to optimize both query reformulation and retrieval. Extensive experiments on four conversational search datasets demonstrate the effectiveness of ConvGQR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15645v3-abstract-full').style.display = 'none'; document.getElementById('2305.15645v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.06573">arXiv:2303.06573</a> <span> [<a href="https://arxiv.org/pdf/2303.06573">pdf</a>, <a href="https://arxiv.org/format/2303.06573">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models Know Your Contextual Search Intent: A Prompting Framework for Conversational Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mao%2C+K">Kelong Mao</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+Z">Zhicheng Dou</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+J">Jiewen Hou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haonan Chen</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+H">Hongjin Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.06573v2-abstract-short" style="display: inline;"> Precisely understanding users' contextual search intent has been an important challenge for conversational search. As conversational search sessions are much more diverse and long-tailed, existing methods trained on limited data still show unsatisfactory effectiveness and robustness to handle real conversational search scenarios. Recently, large language models (LLMs) have demonstrated amazing cap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.06573v2-abstract-full').style.display = 'inline'; document.getElementById('2303.06573v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.06573v2-abstract-full" style="display: none;"> Precisely understanding users' contextual search intent has been an important challenge for conversational search. As conversational search sessions are much more diverse and long-tailed, existing methods trained on limited data still show unsatisfactory effectiveness and robustness to handle real conversational search scenarios. Recently, large language models (LLMs) have demonstrated amazing capabilities for text generation and conversation understanding. In this work, we present a simple yet effective prompting framework, called LLM4CS, to leverage LLMs as a text-based search intent interpreter to help conversational search. Under this framework, we explore three prompting methods to generate multiple query rewrites and hypothetical responses, and propose to aggregate them into an integrated representation that can robustly represent the user's real contextual search intent. Extensive automatic evaluations and human evaluations on three widely used conversational search benchmarks, including CAsT-19, CAsT-20, and CAsT-21, demonstrate the remarkable performance of our simple LLM4CS framework compared with existing methods and even using human rewrites. Our findings provide important evidence to better understand and leverage LLMs for conversational search. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.06573v2-abstract-full').style.display = 'none'; document.getElementById('2303.06573v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of EMNLP 2023. Code: https://github.com/kyriemao/LLM4CS/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.04175">arXiv:2211.04175</a> <span> [<a href="https://arxiv.org/pdf/2211.04175">pdf</a>, <a href="https://arxiv.org/format/2211.04175">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Efficiency in Multidevice Federated Learning through Data Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Malekzadeh%2C+M">Mohammad Malekzadeh</a>, <a href="/search/cs?searchtype=author&query=Chatterjee%2C+S">Soumyajit Chatterjee</a>, <a href="/search/cs?searchtype=author&query=Kawsar%2C+F">Fahim Kawsar</a>, <a href="/search/cs?searchtype=author&query=Mathur%2C+A">Akhil Mathur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.04175v4-abstract-short" style="display: inline;"> Federated learning (FL) in multidevice environments creates new opportunities to learn from a vast and diverse amount of private data. Although personal devices capture valuable data, their memory, computing, connectivity, and battery resources are often limited. Since deep neural networks (DNNs) are the typical machine learning models employed in FL, there are demands for integrating ubiquitous c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04175v4-abstract-full').style.display = 'inline'; document.getElementById('2211.04175v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.04175v4-abstract-full" style="display: none;"> Federated learning (FL) in multidevice environments creates new opportunities to learn from a vast and diverse amount of private data. Although personal devices capture valuable data, their memory, computing, connectivity, and battery resources are often limited. Since deep neural networks (DNNs) are the typical machine learning models employed in FL, there are demands for integrating ubiquitous constrained devices into the training process of DNNs. In this paper, we develop an FL framework to incorporate on-device data selection on such constrained devices, which allows partition-based training of a DNN through collaboration between constrained devices and resourceful devices of the same client. Evaluations on five benchmark DNNs and six benchmark datasets across different modalities show that, on average, our framework achieves ~19% higher accuracy and ~58% lower latency; compared to the baseline FL without our implemented strategies. We demonstrate the effectiveness of our FL framework when dealing with imbalanced data, client participation heterogeneity, and various mobility patterns. As a benchmark for the community, our code is available at https://github.com/dr-bell/data-centric-federated-learning <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04175v4-abstract-full').style.display = 'none'; document.getElementById('2211.04175v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Previous version (v3) was presented at ICLR 2023 Workshop on Machine Learning for IoT: Datasets, Perception, and Understanding</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03602">arXiv:2211.03602</a> <span> [<a href="https://arxiv.org/pdf/2211.03602">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Analysis, Statistics and Probability">physics.data-an</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41467-023-38853-3">10.1038/s41467-023-38853-3 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Retention Time Prediction for Chromatographic Enantioseparation by Quantile Geometry-enhanced Graph Neural Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hao Xu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jinglong Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dongxiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fanyang Mo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03602v2-abstract-short" style="display: inline;"> A new research framework is proposed to incorporate machine learning techniques into the field of experimental chemistry to facilitate chromatographic enantioseparation. A documentary dataset of chiral molecular retention times (CMRT dataset) in high-performance liquid chromatography is established to handle the challenge of data acquisition. Based on the CMRT dataset, a quantile geometry-enhanced… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03602v2-abstract-full').style.display = 'inline'; document.getElementById('2211.03602v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03602v2-abstract-full" style="display: none;"> A new research framework is proposed to incorporate machine learning techniques into the field of experimental chemistry to facilitate chromatographic enantioseparation. A documentary dataset of chiral molecular retention times (CMRT dataset) in high-performance liquid chromatography is established to handle the challenge of data acquisition. Based on the CMRT dataset, a quantile geometry-enhanced graph neural network is proposed to learn the molecular structure-retention time relationship, which shows a satisfactory predictive ability for enantiomers. The domain knowledge of chromatography is incorporated into the machine learning model to achieve multi-column prediction, which paves the way for chromatographic enantioseparation prediction by calculating the separation probability. Experiments confirm that the proposed research framework works well in retention time prediction and chromatographic enantioseparation facilitation, which sheds light on the application of machine learning techniques to the experimental scene and improves the efficiency of experimenters to speed up scientific discovery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03602v2-abstract-full').style.display = 'none'; document.getElementById('2211.03602v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.10134">arXiv:2208.10134</a> <span> [<a href="https://arxiv.org/pdf/2208.10134">pdf</a>, <a href="https://arxiv.org/format/2208.10134">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3670007">10.1145/3670007 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Machine Learning with Confidential Computing: A Systematization of Knowledge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Tarkhani%2C+Z">Zahra Tarkhani</a>, <a href="/search/cs?searchtype=author&query=Haddadi%2C+H">Hamed Haddadi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.10134v3-abstract-short" style="display: inline;"> Privacy and security challenges in Machine Learning (ML) have become increasingly severe, along with ML's pervasive development and the recent demonstration of large attack surfaces. As a mature system-oriented approach, Confidential Computing has been utilized in both academia and industry to mitigate privacy and security issues in various ML scenarios. In this paper, the conjunction between ML a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.10134v3-abstract-full').style.display = 'inline'; document.getElementById('2208.10134v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.10134v3-abstract-full" style="display: none;"> Privacy and security challenges in Machine Learning (ML) have become increasingly severe, along with ML's pervasive development and the recent demonstration of large attack surfaces. As a mature system-oriented approach, Confidential Computing has been utilized in both academia and industry to mitigate privacy and security issues in various ML scenarios. In this paper, the conjunction between ML and Confidential Computing is investigated. We systematize the prior work on Confidential Computing-assisted ML techniques that provide i) confidentiality guarantees and ii) integrity assurances, and discuss their advanced features and drawbacks. Key challenges are further identified, and we provide dedicated analyses of the limitations in existing Trusted Execution Environment (TEE) systems for ML use cases. Finally, prospective works are discussed, including grounded privacy definitions for closed-loop protection, partitioned executions of efficient ML, dedicated TEE-assisted designs for ML, TEE-aware ML, and ML full pipeline guarantees. By providing these potential solutions in our systematization of knowledge, we aim to build the bridge to help achieve a much stronger TEE-enabled ML for privacy guarantees without introducing computation and system costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.10134v3-abstract-full').style.display = 'none'; document.getElementById('2208.10134v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Survey paper, 37 pages, accepted to ACM Computing Surveys</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.01193">arXiv:2207.01193</a> <span> [<a href="https://arxiv.org/pdf/2207.01193">pdf</a>, <a href="https://arxiv.org/format/2207.01193">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2023.findings-acl.355">10.18653/v1/2023.findings-acl.355 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Customized Text Sanitization Mechanism with Differential Privacy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huimin Chen</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanhao Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cen Chen</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengyu Wang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+J">Jamie Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.01193v2-abstract-short" style="display: inline;"> As privacy issues are receiving increasing attention within the Natural Language Processing (NLP) community, numerous methods have been proposed to sanitize texts subject to differential privacy. However, the state-of-the-art text sanitization mechanisms based on metric local differential privacy (MLDP) do not apply to non-metric semantic similarity measures and cannot achieve good trade-offs betw… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.01193v2-abstract-full').style.display = 'inline'; document.getElementById('2207.01193v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.01193v2-abstract-full" style="display: none;"> As privacy issues are receiving increasing attention within the Natural Language Processing (NLP) community, numerous methods have been proposed to sanitize texts subject to differential privacy. However, the state-of-the-art text sanitization mechanisms based on metric local differential privacy (MLDP) do not apply to non-metric semantic similarity measures and cannot achieve good trade-offs between privacy and utility. To address the above limitations, we propose a novel Customized Text (CusText) sanitization mechanism based on the original $蔚$-differential privacy (DP) definition, which is compatible with any similarity measure. Furthermore, CusText assigns each input token a customized output set of tokens to provide more advanced privacy protection at the token level. Extensive experiments on several benchmark datasets show that CusText achieves a better trade-off between privacy and utility than existing mechanisms. The code is available at https://github.com/sai4july/CusText. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.01193v2-abstract-full').style.display = 'none'; document.getElementById('2207.01193v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been accepted to the Findings of ACL 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> https://aclanthology.org/2023.findings-acl.355/ </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.03322">arXiv:2205.03322</a> <span> [<a href="https://arxiv.org/pdf/2205.03322">pdf</a>, <a href="https://arxiv.org/ps/2205.03322">ps</a>, <a href="https://arxiv.org/format/2205.03322">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Operating Systems">cs.OS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Private delegated computations using strong isolation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Brossard%2C+M">Mathias Brossard</a>, <a href="/search/cs?searchtype=author&query=Bryant%2C+G">Guilhem Bryant</a>, <a href="/search/cs?searchtype=author&query=Gaabouri%2C+B+E">Basma El Gaabouri</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xinxin Fan</a>, <a href="/search/cs?searchtype=author&query=Ferreira%2C+A">Alexandre Ferreira</a>, <a href="/search/cs?searchtype=author&query=Grimley-Evans%2C+E">Edmund Grimley-Evans</a>, <a href="/search/cs?searchtype=author&query=Haster%2C+C">Christopher Haster</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+E">Evan Johnson</a>, <a href="/search/cs?searchtype=author&query=Miller%2C+D">Derek Miller</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Mulligan%2C+D+P">Dominic P. Mulligan</a>, <a href="/search/cs?searchtype=author&query=Spinale%2C+N">Nick Spinale</a>, <a href="/search/cs?searchtype=author&query=van+Hensbergen%2C+E">Eric van Hensbergen</a>, <a href="/search/cs?searchtype=author&query=Vincent%2C+H+J+M">Hugo J. M. Vincent</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+S">Shale Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.03322v1-abstract-short" style="display: inline;"> Sensitive computations are now routinely delegated to third-parties. In response, Confidential Computing technologies are being introduced to microprocessors, offering a protected processing environment, which we generically call an isolate, providing confidentiality and integrity guarantees to code and data hosted within -- even in the face of a privileged attacker. Isolates, with an attestation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.03322v1-abstract-full').style.display = 'inline'; document.getElementById('2205.03322v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.03322v1-abstract-full" style="display: none;"> Sensitive computations are now routinely delegated to third-parties. In response, Confidential Computing technologies are being introduced to microprocessors, offering a protected processing environment, which we generically call an isolate, providing confidentiality and integrity guarantees to code and data hosted within -- even in the face of a privileged attacker. Isolates, with an attestation protocol, permit remote third-parties to establish a trusted "beachhead" containing known code and data on an otherwise untrusted machine. Yet, the rise of these technologies introduces many new problems, including: how to ease provisioning of computations safely into isolates; how to develop distributed systems spanning multiple classes of isolate; and what to do about the billions of "legacy" devices without support for Confidential Computing? Tackling the problems above, we introduce Veracruz, a framework that eases the design and implementation of complex privacy-preserving, collaborative, delegated computations among a group of mutually mistrusting principals. Veracruz supports multiple isolation technologies and provides a common programming model and attestation protocol across all of them, smoothing deployment of delegated computations over supported technologies. We demonstrate Veracruz in operation, on private in-cloud object detection on encrypted video streaming from a video camera. In addition to supporting hardware-backed isolates -- like AWS Nitro Enclaves and Arm Confidential Computing Architecture Realms -- Veracruz also provides pragmatic "software isolates" on Armv8-A devices without hardware Confidential Computing capability, using the high-assurance seL4 microkernel and our IceCap framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.03322v1-abstract-full').style.display = 'none'; document.getElementById('2205.03322v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.08174">arXiv:2202.08174</a> <span> [<a href="https://arxiv.org/pdf/2202.08174">pdf</a>, <a href="https://arxiv.org/format/2202.08174">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3508396.3512877">10.1145/3508396.3512877 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Towards Battery-Free Machine Learning and Inference in Underwater Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yuchen Zhao</a>, <a href="/search/cs?searchtype=author&query=Afzal%2C+S+S">Sayed Saad Afzal</a>, <a href="/search/cs?searchtype=author&query=Akbar%2C+W">Waleed Akbar</a>, <a href="/search/cs?searchtype=author&query=Rodriguez%2C+O">Osvy Rodriguez</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Boyle%2C+D">David Boyle</a>, <a href="/search/cs?searchtype=author&query=Adib%2C+F">Fadel Adib</a>, <a href="/search/cs?searchtype=author&query=Haddadi%2C+H">Hamed Haddadi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.08174v1-abstract-short" style="display: inline;"> This paper is motivated by a simple question: Can we design and build battery-free devices capable of machine learning and inference in underwater environments? An affirmative answer to this question would have significant implications for a new generation of underwater sensing and monitoring applications for environmental monitoring, scientific exploration, and climate/weather prediction. To an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.08174v1-abstract-full').style.display = 'inline'; document.getElementById('2202.08174v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.08174v1-abstract-full" style="display: none;"> This paper is motivated by a simple question: Can we design and build battery-free devices capable of machine learning and inference in underwater environments? An affirmative answer to this question would have significant implications for a new generation of underwater sensing and monitoring applications for environmental monitoring, scientific exploration, and climate/weather prediction. To answer this question, we explore the feasibility of bridging advances from the past decade in two fields: battery-free networking and low-power machine learning. Our exploration demonstrates that it is indeed possible to enable battery-free inference in underwater environments. We designed a device that can harvest energy from underwater sound, power up an ultra-low-power microcontroller and on-board sensor, perform local inference on sensed measurements using a lightweight Deep Neural Network, and communicate the inference result via backscatter to a receiver. We tested our prototype in an emulated marine bioacoustics application, demonstrating the potential to recognize underwater animal sounds without batteries. Through this exploration, we highlight the challenges and opportunities for making underwater battery-free inference and machine learning ubiquitous. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.08174v1-abstract-full').style.display = 'none'; document.getElementById('2202.08174v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, HotMobile '22, March 9-10, 2022, Tempe, AZ, USA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.05962">arXiv:2202.05962</a> <span> [<a href="https://arxiv.org/pdf/2202.05962">pdf</a>, <a href="https://arxiv.org/format/2202.05962">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.chempr.2022.08.008">10.1016/j.chempr.2022.08.008 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> High-throughput discovery of chemical structure-polarity relationships combining automation and machine learning techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hao Xu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jinglong Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qianyi Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuntian Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianning Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&query=Young%2C+M+C">Michael C. Young</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yan Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dongxiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fanyang Mo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.05962v1-abstract-short" style="display: inline;"> As an essential attribute of organic compounds, polarity has a profound influence on many molecular properties such as solubility and phase transition temperature. Thin layer chromatography (TLC) represents a commonly used technique for polarity measurement. However, current TLC analysis presents several problems, including the need for a large number of attempts to obtain suitable conditions, as… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.05962v1-abstract-full').style.display = 'inline'; document.getElementById('2202.05962v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.05962v1-abstract-full" style="display: none;"> As an essential attribute of organic compounds, polarity has a profound influence on many molecular properties such as solubility and phase transition temperature. Thin layer chromatography (TLC) represents a commonly used technique for polarity measurement. However, current TLC analysis presents several problems, including the need for a large number of attempts to obtain suitable conditions, as well as irreproducibility due to non-standardization. Herein, we describe an automated experiment system for TLC analysis. This system is designed to conduct TLC analysis automatically, facilitating high-throughput experimentation by collecting large experimental data under standardized conditions. Using these datasets, machine learning (ML) methods are employed to construct surrogate models correlating organic compounds' structures and their polarity using retardation factor (Rf). The trained ML models are able to predict the Rf value curve of organic compounds with high accuracy. Furthermore, the constitutive relationship between the compound and its polarity can also be discovered through these modeling methods, and the underlying mechanism is rationalized through adsorption theories. The trained ML models not only reduce the need for empirical optimization currently required for TLC analysis, but also provide general guidelines for the selection of conditions, making TLC an easily accessible tool for the broad scientific community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.05962v1-abstract-full').style.display = 'none'; document.getElementById('2202.05962v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Chem 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.13929">arXiv:2105.13929</a> <span> [<a href="https://arxiv.org/pdf/2105.13929">pdf</a>, <a href="https://arxiv.org/format/2105.13929">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Quantifying and Localizing Usable Information Leakage from Neural Network Gradients </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Borovykh%2C+A">Anastasia Borovykh</a>, <a href="/search/cs?searchtype=author&query=Malekzadeh%2C+M">Mohammad Malekzadeh</a>, <a href="/search/cs?searchtype=author&query=Demetriou%2C+S">Soteris Demetriou</a>, <a href="/search/cs?searchtype=author&query=G%C3%BCnd%C3%BCz%2C+D">Deniz G眉nd眉z</a>, <a href="/search/cs?searchtype=author&query=Haddadi%2C+H">Hamed Haddadi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.13929v3-abstract-short" style="display: inline;"> In collaborative learning, clients keep their data private and communicate only the computed gradients of the deep neural network being trained on their local data. Several recent attacks show that one can still extract private information from the shared network's gradients compromising clients' privacy. In this paper, to quantify the private information leakage from gradients we adopt usable inf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.13929v3-abstract-full').style.display = 'inline'; document.getElementById('2105.13929v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.13929v3-abstract-full" style="display: none;"> In collaborative learning, clients keep their data private and communicate only the computed gradients of the deep neural network being trained on their local data. Several recent attacks show that one can still extract private information from the shared network's gradients compromising clients' privacy. In this paper, to quantify the private information leakage from gradients we adopt usable information theory. We focus on two types of private information: original information in data reconstruction attacks and latent information in attribute inference attacks. Furthermore, a sensitivity analysis over the gradients is performed to explore the underlying cause of information leakage and validate the results of the proposed framework. Finally, we conduct numerical evaluations on six benchmark datasets and four well-known deep models. We measure the impact of training hyperparameters, e.g., batches and epochs, as well as potential defense mechanisms, e.g., dropout and differential privacy. Our proposed framework enables clients to localize and quantify the private information leakage in a layer-wise manner, and enables a better understanding of the sources of information leakage in collaborative learning, which can be used by future studies to benchmark new attacks and defense mechanisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.13929v3-abstract-full').style.display = 'none'; document.getElementById('2105.13929v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.14380">arXiv:2104.14380</a> <span> [<a href="https://arxiv.org/pdf/2104.14380">pdf</a>, <a href="https://arxiv.org/format/2104.14380">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PPFL: Privacy-preserving Federated Learning with Trusted Execution Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Haddadi%2C+H">Hamed Haddadi</a>, <a href="/search/cs?searchtype=author&query=Katevas%2C+K">Kleomenis Katevas</a>, <a href="/search/cs?searchtype=author&query=Marin%2C+E">Eduard Marin</a>, <a href="/search/cs?searchtype=author&query=Perino%2C+D">Diego Perino</a>, <a href="/search/cs?searchtype=author&query=Kourtellis%2C+N">Nicolas Kourtellis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.14380v2-abstract-short" style="display: inline;"> We propose and implement a Privacy-preserving Federated Learning ($PPFL$) framework for mobile systems to limit privacy leakages in federated learning. Leveraging the widespread presence of Trusted Execution Environments (TEEs) in high-end and mobile devices, we utilize TEEs on clients for local training, and on servers for secure aggregation, so that model/gradient updates are hidden from adversa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.14380v2-abstract-full').style.display = 'inline'; document.getElementById('2104.14380v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.14380v2-abstract-full" style="display: none;"> We propose and implement a Privacy-preserving Federated Learning ($PPFL$) framework for mobile systems to limit privacy leakages in federated learning. Leveraging the widespread presence of Trusted Execution Environments (TEEs) in high-end and mobile devices, we utilize TEEs on clients for local training, and on servers for secure aggregation, so that model/gradient updates are hidden from adversaries. Challenged by the limited memory size of current TEEs, we leverage greedy layer-wise training to train each model's layer inside the trusted area until its convergence. The performance evaluation of our implementation shows that $PPFL$ can significantly improve privacy while incurring small system overheads at the client-side. In particular, $PPFL$ can successfully defend the trained model against data reconstruction, property inference, and membership inference attacks. Furthermore, it can achieve comparable model utility with fewer communication rounds (0.54$\times$) and a similar amount of network traffic (1.002$\times$) compared to the standard federated learning of a complete model. This is achieved while only introducing up to ~15% CPU time, ~18% memory usage, and ~21% energy consumption overhead in $PPFL$'s client-side. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.14380v2-abstract-full').style.display = 'none'; document.getElementById('2104.14380v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 8 figures, accepted to MobiSys 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.08762">arXiv:2010.08762</a> <span> [<a href="https://arxiv.org/pdf/2010.08762">pdf</a>, <a href="https://arxiv.org/format/2010.08762">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Layer-wise Characterization of Latent Information Leakage in Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Borovykh%2C+A">Anastasia Borovykh</a>, <a href="/search/cs?searchtype=author&query=Malekzadeh%2C+M">Mohammad Malekzadeh</a>, <a href="/search/cs?searchtype=author&query=Haddadi%2C+H">Hamed Haddadi</a>, <a href="/search/cs?searchtype=author&query=Demetriou%2C+S">Soteris Demetriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.08762v4-abstract-short" style="display: inline;"> Training deep neural networks via federated learning allows clients to share, instead of the original data, only the model trained on their data. Prior work has demonstrated that in practice a client's private information, unrelated to the main learning task, can be discovered from the model's gradients, which compromises the promised privacy protection. However, there is still no formal approach… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.08762v4-abstract-full').style.display = 'inline'; document.getElementById('2010.08762v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.08762v4-abstract-full" style="display: none;"> Training deep neural networks via federated learning allows clients to share, instead of the original data, only the model trained on their data. Prior work has demonstrated that in practice a client's private information, unrelated to the main learning task, can be discovered from the model's gradients, which compromises the promised privacy protection. However, there is still no formal approach for quantifying the leakage of private information via the shared updated model or gradients. In this work, we analyze property inference attacks and define two metrics based on (i) an adaptation of the empirical $\mathcal{V}$-information, and (ii) a sensitivity analysis using Jacobian matrices allowing us to measure changes in the gradients with respect to latent information. We show the applicability of our proposed metrics in localizing private latent information in a layer-wise manner and in two settings where (i) we have or (ii) we do not have knowledge of the attackers' capabilities. We evaluate the proposed metrics for quantifying information leakage on three real-world datasets using three benchmark models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.08762v4-abstract-full').style.display = 'none'; document.getElementById('2010.08762v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, at ICLR workshop (Distributed and Private Machine Learning)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.05703">arXiv:2004.05703</a> <span> [<a href="https://arxiv.org/pdf/2004.05703">pdf</a>, <a href="https://arxiv.org/format/2004.05703">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3386901.3388946">10.1145/3386901.3388946 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DarkneTZ: Towards Model Privacy at the Edge using Trusted Execution Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fan Mo</a>, <a href="/search/cs?searchtype=author&query=Shamsabadi%2C+A+S">Ali Shahin Shamsabadi</a>, <a href="/search/cs?searchtype=author&query=Katevas%2C+K">Kleomenis Katevas</a>, <a href="/search/cs?searchtype=author&query=Demetriou%2C+S">Soteris Demetriou</a>, <a href="/search/cs?searchtype=author&query=Leontiadis%2C+I">Ilias Leontiadis</a>, <a href="/search/cs?searchtype=author&query=Cavallaro%2C+A">Andrea Cavallaro</a>, <a href="/search/cs?searchtype=author&query=Haddadi%2C+H">Hamed Haddadi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.05703v1-abstract-short" style="display: inline;"> We present DarkneTZ, a framework that uses an edge device's Trusted Execution Environment (TEE) in conjunction with model partitioning to limit the attack surface against Deep Neural Networks (DNNs). Increasingly, edge devices (smartphones and consumer IoT devices) are equipped with pre-trained DNNs for a variety of applications. This trend comes with privacy risks as models can leak information a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.05703v1-abstract-full').style.display = 'inline'; document.getElementById('2004.05703v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.05703v1-abstract-full" style="display: none;"> We present DarkneTZ, a framework that uses an edge device's Trusted Execution Environment (TEE) in conjunction with model partitioning to limit the attack surface against Deep Neural Networks (DNNs). Increasingly, edge devices (smartphones and consumer IoT devices) are equipped with pre-trained DNNs for a variety of applications. This trend comes with privacy risks as models can leak information about their training data through effective membership inference attacks (MIAs). We evaluate the performance of DarkneTZ, including CPU execution time, memory usage, and accurate power consumption, using two small and six large image classification models. Due to the limited memory of the edge device's TEE, we partition model layers into more sensitive layers (to be executed inside the device TEE), and a set of layers to be executed in the untrusted part of the operating system. Our results show that even if a single layer is hidden, we can provide reliable model privacy and defend against state of the art MIAs, with only 3% performance overhead. When fully utilizing the TEE, DarkneTZ provides model protections with up to 10% overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.05703v1-abstract-full').style.display = 'none'; document.getElementById('2004.05703v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 8 figures, accepted to ACM MobiSys 2020</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Mo%2C+F&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Mo%2C+F&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Mo%2C+F&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository