CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 80 results for author: <span class="mathjax">Minervini, P</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Minervini%2C+P">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Minervini, P"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Minervini%2C+P&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Minervini, P"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Minervini%2C+P&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Minervini%2C+P&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Minervini%2C+P&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05867">arXiv:2502.05867</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05867">pdf</a>, <a href="https://arxiv.org/format/2502.05867">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Self-Training Large Language Models for Tool-Use Without Demonstrations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+N">Ne Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xuanli He</a>, <a href="/search/cs?searchtype=author&amp;query=van+Krieken%2C+E">Emile van Krieken</a>, <a href="/search/cs?searchtype=author&amp;query=Lesci%2C+P">Pietro Lesci</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05867v1-abstract-short" style="display: inline;"> Large language models (LLMs) remain prone to factual inaccuracies and computational errors, including hallucinations and mistakes in mathematical reasoning. Recent work augmented LLMs with tools to mitigate these shortcomings, but often requires curated gold tool-use demonstrations. In this paper, we investigate whether LLMs can learn to use tools without demonstrations. First, we analyse zero-sho&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05867v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05867v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05867v1-abstract-full" style="display: none;"> Large language models (LLMs) remain prone to factual inaccuracies and computational errors, including hallucinations and mistakes in mathematical reasoning. Recent work augmented LLMs with tools to mitigate these shortcomings, but often requires curated gold tool-use demonstrations. In this paper, we investigate whether LLMs can learn to use tools without demonstrations. First, we analyse zero-shot prompting strategies to guide LLMs in tool utilisation. Second, we propose a self-training method to synthesise tool-use traces using the LLM itself. We compare supervised fine-tuning and preference fine-tuning techniques for fine-tuning the model on datasets constructed using existing Question Answering (QA) datasets, i.e., TriviaQA and GSM8K. Experiments show that tool-use enhances performance on a long-tail knowledge task: 3.7% on PopQA, which is used solely for evaluation, but leads to mixed results on other datasets, i.e., TriviaQA, GSM8K, and NQ-Open. Our findings highlight the potential and challenges of integrating external tools into LLMs without demonstrations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05867v1-abstract-full').style.display = 'none'; document.getElementById('2502.05867v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05092">arXiv:2502.05092</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05092">pdf</a>, <a href="https://arxiv.org/format/2502.05092">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Lost in Time: Clock and Calendar Understanding Challenges in Multimodal LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Saxena%2C+R">Rohit Saxena</a>, <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05092v1-abstract-short" style="display: inline;"> Understanding time from visual representations is a fundamental cognitive skill, yet it remains a challenge for multimodal large language models (MLLMs). In this work, we investigate the capabilities of MLLMs in interpreting time and date through analogue clocks and yearly calendars. To facilitate this, we curated a structured dataset comprising two subsets: 1) $\textit{ClockQA}$, which comprises&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05092v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05092v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05092v1-abstract-full" style="display: none;"> Understanding time from visual representations is a fundamental cognitive skill, yet it remains a challenge for multimodal large language models (MLLMs). In this work, we investigate the capabilities of MLLMs in interpreting time and date through analogue clocks and yearly calendars. To facilitate this, we curated a structured dataset comprising two subsets: 1) $\textit{ClockQA}$, which comprises various types of clock styles$-$standard, black-dial, no-second-hand, Roman numeral, and arrow-hand clocks$-$paired with time related questions; and 2) $\textit{CalendarQA}$, which consists of yearly calendar images with questions ranging from commonly known dates (e.g., Christmas, New Year&#39;s Day) to computationally derived ones (e.g., the 100th or 153rd day of the year). We aim to analyse how MLLMs can perform visual recognition, numerical reasoning, and temporal inference when presented with time-related visual data. Our evaluations show that despite recent advancements, reliably understanding time remains a significant challenge for MLLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05092v1-abstract-full').style.display = 'none'; document.getElementById('2502.05092v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12275">arXiv:2501.12275</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.12275">pdf</a>, <a href="https://arxiv.org/format/2501.12275">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> With Great Backbones Comes Great Adversarial Transferability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/cs?searchtype=author&amp;query=Hambardzumyan%2C+K">Karen Hambardzumyan</a>, <a href="/search/cs?searchtype=author&amp;query=Papikyan%2C+D">Davit Papikyan</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Gordo%2C+A">Albert Gordo</a>, <a href="/search/cs?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/cs?searchtype=author&amp;query=Markosyan%2C+A+H">Aram H. Markosyan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12275v1-abstract-short" style="display: inline;"> Advances in self-supervised learning (SSL) for machine vision have improved representation robustness and model performance, giving rise to pre-trained backbones like \emph{ResNet} and \emph{ViT} models tuned with SSL methods such as \emph{SimCLR}. Due to the computational and data demands of pre-training, the utilization of such backbones becomes a strenuous necessity. However, employing these ba&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12275v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12275v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12275v1-abstract-full" style="display: none;"> Advances in self-supervised learning (SSL) for machine vision have improved representation robustness and model performance, giving rise to pre-trained backbones like \emph{ResNet} and \emph{ViT} models tuned with SSL methods such as \emph{SimCLR}. Due to the computational and data demands of pre-training, the utilization of such backbones becomes a strenuous necessity. However, employing these backbones may inherit vulnerabilities to adversarial attacks. While adversarial robustness has been studied under \emph{white-box} and \emph{black-box} settings, the robustness of models tuned on pre-trained backbones remains largely unexplored. Additionally, the role of tuning meta-information in mitigating exploitation risks is unclear. This work systematically evaluates the adversarial robustness of such models across $20,000$ combinations of tuning meta-information, including fine-tuning techniques, backbone families, datasets, and attack types. We propose using proxy models to transfer attacks, simulating varying levels of target knowledge by fine-tuning these proxies with diverse configurations. Our findings reveal that proxy-based attacks approach the effectiveness of \emph{white-box} methods, even with minimal tuning knowledge. We also introduce a naive &#34;backbone attack,&#34; leveraging only the backbone to generate adversarial samples, which outperforms \emph{black-box} attacks and rivals \emph{white-box} methods, highlighting critical risks in model-sharing practices. Finally, our ablations reveal how increasing tuning meta-information impacts attack transferability, measuring each meta-information combination. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12275v1-abstract-full').style.display = 'none'; document.getElementById('2501.12275v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16475">arXiv:2412.16475</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.16475">pdf</a>, <a href="https://arxiv.org/format/2412.16475">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> When Can Proxies Improve the Sample Complexity of Preference Learning? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yuchen Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=de+Souza%2C+D+A">Daniel Augusto de Souza</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+Z">Zhengyan Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+M">Mengyue Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=D%27Amour%2C+A">Alexander D&#39;Amour</a>, <a href="/search/cs?searchtype=author&amp;query=Kusner%2C+M+J">Matt J. Kusner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16475v1-abstract-short" style="display: inline;"> We address the problem of reward hacking, where maximising a proxy reward does not necessarily increase the true reward. This is a key concern for Large Language Models (LLMs), as they are often fine-tuned on human preferences that may not accurately reflect a true objective. Existing work uses various tricks such as regularisation, tweaks to the reward model, and reward hacking detectors, to limi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16475v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16475v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16475v1-abstract-full" style="display: none;"> We address the problem of reward hacking, where maximising a proxy reward does not necessarily increase the true reward. This is a key concern for Large Language Models (LLMs), as they are often fine-tuned on human preferences that may not accurately reflect a true objective. Existing work uses various tricks such as regularisation, tweaks to the reward model, and reward hacking detectors, to limit the influence that such proxy preferences have on a model. Luckily, in many contexts such as medicine, education, and law, a sparse amount of expert data is often available. In these cases, it is often unclear whether the addition of proxy data can improve policy learning. We outline a set of sufficient conditions on proxy feedback that, if satisfied, indicate that proxy data can provably improve the sample complexity of learning the ground truth policy. These conditions can inform the data collection process for specific tasks. The result implies a parameterisation for LLMs that achieves this improved sample complexity. We detail how one can adapt existing architectures to yield this improved sample complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16475v1-abstract-full').style.display = 'none'; document.getElementById('2412.16475v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15626">arXiv:2411.15626</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.15626">pdf</a>, <a href="https://arxiv.org/format/2411.15626">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Aligning Generalisation Between Humans and Machines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ilievski%2C+F">Filip Ilievski</a>, <a href="/search/cs?searchtype=author&amp;query=Hammer%2C+B">Barbara Hammer</a>, <a href="/search/cs?searchtype=author&amp;query=van+Harmelen%2C+F">Frank van Harmelen</a>, <a href="/search/cs?searchtype=author&amp;query=Paassen%2C+B">Benjamin Paassen</a>, <a href="/search/cs?searchtype=author&amp;query=Saralajew%2C+S">Sascha Saralajew</a>, <a href="/search/cs?searchtype=author&amp;query=Schmid%2C+U">Ute Schmid</a>, <a href="/search/cs?searchtype=author&amp;query=Biehl%2C+M">Michael Biehl</a>, <a href="/search/cs?searchtype=author&amp;query=Bolognesi%2C+M">Marianna Bolognesi</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+X+L">Xin Luna Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Gashteovski%2C+K">Kiril Gashteovski</a>, <a href="/search/cs?searchtype=author&amp;query=Hitzler%2C+P">Pascal Hitzler</a>, <a href="/search/cs?searchtype=author&amp;query=Marra%2C+G">Giuseppe Marra</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Mundt%2C+M">Martin Mundt</a>, <a href="/search/cs?searchtype=author&amp;query=Ngomo%2C+A+N">Axel-Cyrille Ngonga Ngomo</a>, <a href="/search/cs?searchtype=author&amp;query=Oltramari%2C+A">Alessandro Oltramari</a>, <a href="/search/cs?searchtype=author&amp;query=Pasi%2C+G">Gabriella Pasi</a>, <a href="/search/cs?searchtype=author&amp;query=Saribatur%2C+Z+G">Zeynep G. Saribatur</a>, <a href="/search/cs?searchtype=author&amp;query=Serafini%2C+L">Luciano Serafini</a>, <a href="/search/cs?searchtype=author&amp;query=Shawe-Taylor%2C+J">John Shawe-Taylor</a>, <a href="/search/cs?searchtype=author&amp;query=Shwartz%2C+V">Vered Shwartz</a>, <a href="/search/cs?searchtype=author&amp;query=Skitalinskaya%2C+G">Gabriella Skitalinskaya</a>, <a href="/search/cs?searchtype=author&amp;query=Stachl%2C+C">Clemens Stachl</a>, <a href="/search/cs?searchtype=author&amp;query=van+de+Ven%2C+G+M">Gido M. van de Ven</a>, <a href="/search/cs?searchtype=author&amp;query=Villmann%2C+T">Thomas Villmann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15626v1-abstract-short" style="display: inline;"> Recent advances in AI -- including generative approaches -- have resulted in technology that can support humans in scientific discovery and decision support but may also disrupt democracies and target individuals. The responsible use of AI increasingly shows the need for human-AI teaming, necessitating effective interaction between humans and machines. A crucial yet often overlooked aspect of thes&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15626v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15626v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15626v1-abstract-full" style="display: none;"> Recent advances in AI -- including generative approaches -- have resulted in technology that can support humans in scientific discovery and decision support but may also disrupt democracies and target individuals. The responsible use of AI increasingly shows the need for human-AI teaming, necessitating effective interaction between humans and machines. A crucial yet often overlooked aspect of these interactions is the different ways in which humans and machines generalise. In cognitive science, human generalisation commonly involves abstraction and concept learning. In contrast, AI generalisation encompasses out-of-domain generalisation in machine learning, rule-based reasoning in symbolic AI, and abstraction in neuro-symbolic AI. In this perspective paper, we combine insights from AI and cognitive science to identify key commonalities and differences across three dimensions: notions of generalisation, methods for generalisation, and evaluation of generalisation. We map the different conceptualisations of generalisation in AI and cognitive science along these three dimensions and consider their role in human-AI teaming. This results in interdisciplinary challenges across AI and cognitive science that must be tackled to provide a foundation for effective and cognitively supported alignment in human-AI teaming scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15626v1-abstract-full').style.display = 'none'; document.getElementById('2411.15626v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02830">arXiv:2411.02830</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.02830">pdf</a>, <a href="https://arxiv.org/format/2411.02830">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mixtures of In-Context Learners </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hong%2C+G">Giwon Hong</a>, <a href="/search/cs?searchtype=author&amp;query=van+Krieken%2C+E">Emile van Krieken</a>, <a href="/search/cs?searchtype=author&amp;query=Ponti%2C+E">Edoardo Ponti</a>, <a href="/search/cs?searchtype=author&amp;query=Malkin%2C+N">Nikolay Malkin</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02830v1-abstract-short" style="display: inline;"> In-context learning (ICL) adapts LLMs by providing demonstrations without fine-tuning the model parameters; however, it does not differentiate between demonstrations and quadratically increases the complexity of Transformer LLMs, exhausting the memory. As a solution, we propose Mixtures of In-Context Learners (MoICL), a novel approach to treat subsets of demonstrations as experts and learn a weigh&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02830v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02830v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02830v1-abstract-full" style="display: none;"> In-context learning (ICL) adapts LLMs by providing demonstrations without fine-tuning the model parameters; however, it does not differentiate between demonstrations and quadratically increases the complexity of Transformer LLMs, exhausting the memory. As a solution, we propose Mixtures of In-Context Learners (MoICL), a novel approach to treat subsets of demonstrations as experts and learn a weighting function to merge their output distributions based on a training set. In our experiments, we show performance improvements on 5 out of 7 classification datasets compared to a set of strong baselines (up to +13\% compared to ICL and LENS). Moreover, we enhance the Pareto frontier of ICL by reducing the inference time needed to achieve the same performance with fewer demonstrations. Finally, MoICL is more robust to out-of-domain (up to +11\%), imbalanced (up to +49\%), or noisy demonstrations (up to +38\%) or can filter these out from datasets. Overall, MoICL is a more expressive approach to learning from demonstrations without exhausting the context window or memory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02830v1-abstract-full').style.display = 'none'; document.getElementById('2411.02830v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19406">arXiv:2410.19406</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.19406">pdf</a>, <a href="https://arxiv.org/format/2410.19406">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Auditing Test To Detect Behavioral Shift in Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Richter%2C+L">Leo Richter</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xuanli He</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Kusner%2C+M+J">Matt J. Kusner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19406v1-abstract-short" style="display: inline;"> As language models (LMs) approach human-level performance, a comprehensive understanding of their behavior becomes crucial. This includes evaluating capabilities, biases, task performance, and alignment with societal values. Extensive initial evaluations, including red teaming and diverse benchmarking, can establish a model&#39;s behavioral profile. However, subsequent fine-tuning or deployment modifi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19406v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19406v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19406v1-abstract-full" style="display: none;"> As language models (LMs) approach human-level performance, a comprehensive understanding of their behavior becomes crucial. This includes evaluating capabilities, biases, task performance, and alignment with societal values. Extensive initial evaluations, including red teaming and diverse benchmarking, can establish a model&#39;s behavioral profile. However, subsequent fine-tuning or deployment modifications may alter these behaviors in unintended ways. We present a method for continual Behavioral Shift Auditing (BSA) in LMs. Building on recent work in hypothesis testing, our auditing test detects behavioral shifts solely through model generations. Our test compares model generations from a baseline model to those of the model under scrutiny and provides theoretical guarantees for change detection while controlling false positives. The test features a configurable tolerance parameter that adjusts sensitivity to behavioral changes for different use cases. We evaluate our approach using two case studies: monitoring changes in (a) toxicity and (b) translation performance. We find that the test is able to detect meaningful changes in behavior distributions using just hundreds of examples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19406v1-abstract-full').style.display = 'none'; document.getElementById('2410.19406v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18860">arXiv:2410.18860</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.18860">pdf</a>, <a href="https://arxiv.org/format/2410.18860">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DeCoRe: Decoding by Contrasting Retrieval Heads to Mitigate Hallucinations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+C">Chen Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Abdulaal%2C+A">Ahmed Abdulaal</a>, <a href="/search/cs?searchtype=author&amp;query=Diethe%2C+T">Tom Diethe</a>, <a href="/search/cs?searchtype=author&amp;query=Teare%2C+P">Philip Teare</a>, <a href="/search/cs?searchtype=author&amp;query=Alex%2C+B">Beatrice Alex</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Saseendran%2C+A">Amrutha Saseendran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18860v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) often hallucinate, producing unfaithful or factually incorrect outputs by misrepresenting the provided context or incorrectly recalling internal knowledge. Recent studies have identified specific attention heads within the Transformer architecture, known as retrieval heads, responsible for extracting relevant contextual information. We hypothesise that masking these re&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18860v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18860v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18860v1-abstract-full" style="display: none;"> Large Language Models (LLMs) often hallucinate, producing unfaithful or factually incorrect outputs by misrepresenting the provided context or incorrectly recalling internal knowledge. Recent studies have identified specific attention heads within the Transformer architecture, known as retrieval heads, responsible for extracting relevant contextual information. We hypothesise that masking these retrieval heads can induce hallucinations and that contrasting the outputs of the base LLM and the masked LLM can reduce hallucinations. To this end, we propose Decoding by Contrasting Retrieval Heads (DeCoRe), a novel training-free decoding strategy that amplifies information found in the context and model parameters. DeCoRe mitigates potentially hallucinated responses by dynamically contrasting the outputs of the base LLM and the masked LLM, using conditional entropy as a guide. Our extensive experiments confirm that DeCoRe significantly improves performance on tasks requiring high contextual faithfulness, such as summarisation (XSum by 18.6%), instruction following (MemoTrap by 10.9%), and open-book question answering (NQ-Open by 2.4% and NQ-Swap by 5.5%). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18860v1-abstract-full').style.display = 'none'; document.getElementById('2410.18860v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16090">arXiv:2410.16090</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16090">pdf</a>, <a href="https://arxiv.org/format/2410.16090">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Analysing the Residual Stream of Language Models Under Knowledge Conflicts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yu Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+X">Xiaotang Du</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+G">Giwon Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=Devoto%2C+A">Alessio Devoto</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongru Wang</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xuanli He</a>, <a href="/search/cs?searchtype=author&amp;query=Wong%2C+K">Kam-Fai Wong</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16090v2-abstract-short" style="display: inline;"> Large language models (LLMs) can store a significant amount of factual knowledge in their parameters. However, their parametric knowledge may conflict with the information provided in the context. Such conflicts can lead to undesirable model behaviour, such as reliance on outdated or incorrect information. In this work, we investigate whether LLMs can identify knowledge conflicts and whether it is&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16090v2-abstract-full').style.display = 'inline'; document.getElementById('2410.16090v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16090v2-abstract-full" style="display: none;"> Large language models (LLMs) can store a significant amount of factual knowledge in their parameters. However, their parametric knowledge may conflict with the information provided in the context. Such conflicts can lead to undesirable model behaviour, such as reliance on outdated or incorrect information. In this work, we investigate whether LLMs can identify knowledge conflicts and whether it is possible to know which source of knowledge the model will rely on by analysing the residual stream of the LLM. Through probing tasks, we find that LLMs can internally register the signal of knowledge conflict in the residual stream, which can be accurately detected by probing the intermediate model activations. This allows us to detect conflicts within the residual stream before generating the answers without modifying the input or model parameters. Moreover, we find that the residual stream shows significantly different patterns when the model relies on contextual knowledge versus parametric knowledge to resolve conflicts. This pattern can be employed to estimate the behaviour of LLMs when conflict happens and prevent unexpected answers before producing the answers. Our analysis offers insights into how LLMs internally manage knowledge conflicts and provides a foundation for developing methods to control the knowledge selection processes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16090v2-abstract-full').style.display = 'none'; document.getElementById('2410.16090v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Foundation Model Interventions Workshop @ NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15999">arXiv:2410.15999</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.15999">pdf</a>, <a href="https://arxiv.org/format/2410.15999">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Steering Knowledge Selection Behaviours in LLMs via SAE-Based Representation Engineering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yu Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Devoto%2C+A">Alessio Devoto</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+G">Giwon Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+X">Xiaotang Du</a>, <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongru Wang</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xuanli He</a>, <a href="/search/cs?searchtype=author&amp;query=Wong%2C+K">Kam-Fai Wong</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15999v3-abstract-short" style="display: inline;"> Large language models (LLMs) can store a significant amount of factual knowledge in their parameters. However, their parametric knowledge may conflict with the information provided in the context -- this phenomenon, known as \emph{context-memory knowledge conflicts}, can lead to undesirable model behaviour, such as reliance on outdated or incorrect information. Analysing the internal activations o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15999v3-abstract-full').style.display = 'inline'; document.getElementById('2410.15999v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15999v3-abstract-full" style="display: none;"> Large language models (LLMs) can store a significant amount of factual knowledge in their parameters. However, their parametric knowledge may conflict with the information provided in the context -- this phenomenon, known as \emph{context-memory knowledge conflicts}, can lead to undesirable model behaviour, such as reliance on outdated or incorrect information. Analysing the internal activations of LLMs, we find that they can internally register the signals of knowledge conflict at mid-layers. Such signals allow us to detect whether a knowledge conflict occurs and use \emph{inference-time} intervention strategies to resolve it. In this work, we propose \textsc{SpARE}, a \emph{training-free} representation engineering method that uses pre-trained sparse auto-encoders (SAEs) to control the knowledge selection behaviour of LLMs. \textsc{SpARE} identifies the functional features that control the knowledge selection behaviours and applies them to edit the internal activations of LLMs at inference time. Our experimental results show that \textsc{SpARE} can effectively control the usage of either knowledge source to resolve knowledge conflict in open-domain question-answering tasks, surpassing existing representation engineering methods ($+10\%$) as well as contrastive decoding methods ($+15\%$). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15999v3-abstract-full').style.display = 'none'; document.getElementById('2410.15999v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NAACL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15438">arXiv:2410.15438</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.15438">pdf</a>, <a href="https://arxiv.org/format/2410.15438">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unveiling and Consulting Core Experts in Retrieval-Augmented MoE-based LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xin Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Nie%2C+P">Ping Nie</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yiwen Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+H">Haojie Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhanqiu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+R">Ruotian Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Gui%2C+T">Tao Gui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+X">Xuanjing Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15438v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) significantly improved the ability of Large Language Models (LLMs) to solve knowledge-intensive tasks. While existing research seeks to enhance RAG performance by retrieving higher-quality documents or designing RAG-specific LLMs, the internal mechanisms within LLMs that contribute to the effectiveness of RAG systems remain underexplored. In this paper, we aim&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15438v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15438v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15438v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) significantly improved the ability of Large Language Models (LLMs) to solve knowledge-intensive tasks. While existing research seeks to enhance RAG performance by retrieving higher-quality documents or designing RAG-specific LLMs, the internal mechanisms within LLMs that contribute to the effectiveness of RAG systems remain underexplored. In this paper, we aim to investigate these internal mechanisms within the popular Mixture-of-Expert (MoE)-based LLMs and demonstrate how to improve RAG by examining expert activations in these LLMs. Our controlled experiments reveal that several core groups of experts are primarily responsible for RAG-related behaviors. The activation of these core experts can signify the model&#39;s inclination towards external/internal knowledge and adjust its behavior. For instance, we identify core experts that can (1) indicate the sufficiency of the model&#39;s internal knowledge, (2) assess the quality of retrieved documents, and (3) enhance the model&#39;s ability to utilize context. Based on these findings, we propose several strategies to enhance RAG&#39;s efficiency and effectiveness through expert activation. Experimental results across various datasets and MoE-based LLMs show the effectiveness of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15438v1-abstract-full').style.display = 'none'; document.getElementById('2410.15438v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12537">arXiv:2410.12537</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.12537">pdf</a>, <a href="https://arxiv.org/format/2410.12537">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Is Complex Query Answering Really Complex? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gregucci%2C+C">Cosimo Gregucci</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+B">Bo Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Hernandez%2C+D">Daniel Hernandez</a>, <a href="/search/cs?searchtype=author&amp;query=Loconte%2C+L">Lorenzo Loconte</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Staab%2C+S">Steffen Staab</a>, <a href="/search/cs?searchtype=author&amp;query=Vergari%2C+A">Antonio Vergari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12537v1-abstract-short" style="display: inline;"> Complex query answering (CQA) on knowledge graphs (KGs) is gaining momentum as a challenging reasoning task. In this paper, we show that the current benchmarks for CQA are not really complex, and the way they are built distorts our perception of progress in this field. For example, we find that in these benchmarks, most queries (up to 98% for some query types) can be reduced to simpler problems, e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12537v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12537v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12537v1-abstract-full" style="display: none;"> Complex query answering (CQA) on knowledge graphs (KGs) is gaining momentum as a challenging reasoning task. In this paper, we show that the current benchmarks for CQA are not really complex, and the way they are built distorts our perception of progress in this field. For example, we find that in these benchmarks, most queries (up to 98% for some query types) can be reduced to simpler problems, e.g., link prediction, where only one link needs to be predicted. The performance of state-of-the-art CQA models drops significantly when such models are evaluated on queries that cannot be reduced to easier types. Thus, we propose a set of more challenging benchmarks, composed of queries that require models to reason over multiple hops and better reflect the construction of real-world KGs. In a systematic empirical investigation, the new benchmarks show that current methods leave much to be desired from current CQA methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12537v1-abstract-full').style.display = 'none'; document.getElementById('2410.12537v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11900">arXiv:2410.11900</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.11900">pdf</a>, <a href="https://arxiv.org/format/2410.11900">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> FLARE: Faithful Logic-Aided Reasoning and Exploration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&amp;query=Lewis%2C+P">Patrick Lewis</a>, <a href="/search/cs?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11900v4-abstract-short" style="display: inline;"> Modern Question Answering (QA) and Reasoning approaches based on Large Language Models (LLMs) commonly use prompting techniques, such as Chain-of-Thought (CoT), assuming the resulting generation will have a more granular exploration and reasoning over the question space and scope. However, such methods struggle with generating outputs that are faithful to the intermediate chain of reasoning produc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11900v4-abstract-full').style.display = 'inline'; document.getElementById('2410.11900v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11900v4-abstract-full" style="display: none;"> Modern Question Answering (QA) and Reasoning approaches based on Large Language Models (LLMs) commonly use prompting techniques, such as Chain-of-Thought (CoT), assuming the resulting generation will have a more granular exploration and reasoning over the question space and scope. However, such methods struggle with generating outputs that are faithful to the intermediate chain of reasoning produced by the model. On the other end of the spectrum, neuro-symbolic methods such as Faithful CoT (F-CoT) propose to combine LLMs with external symbolic solvers. While such approaches boast a high degree of faithfulness, they usually require a model trained for code generation and struggle with tasks that are ambiguous or hard to formalise strictly. We introduce $\textbf{F}$aithful $\textbf{L}$ogic-$\textbf{A}$ided $\textbf{R}$easoning and $\textbf{E}$xploration ($\textbf{FLARE}$), a novel interpretable approach for traversing the problem space using task decompositions. We use the LLM to plan a solution, soft-formalise the query into facts and predicates using a logic programming code and simulate that code execution using an exhaustive multi-hop search over the defined space. Our method allows us to compute the faithfulness of the reasoning process w.r.t. the generated code and analyse the steps of the multi-hop search without relying on external solvers. Our methods achieve SOTA results on $\mathbf{7}$ out of $\mathbf{9}$ diverse reasoning benchmarks. We also show that model faithfulness positively correlates with overall performance and further demonstrate that $\textbf{FLARE}$ allows pinpointing the decisive factors sufficient for and leading to the correct answer with optimal reasoning during the multi-hop search. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11900v4-abstract-full').style.display = 'none'; document.getElementById('2410.11900v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07433">arXiv:2409.07433</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.07433">pdf</a>, <a href="https://arxiv.org/format/2409.07433">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Dot Product is All You Need: Bridging the Gap Between Item Recommendation and Link Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Malitesta%2C+D">Daniele Malitesta</a>, <a href="/search/cs?searchtype=author&amp;query=Mancino%2C+A+C+M">Alberto Carlo Maria Mancino</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Di+Noia%2C+T">Tommaso Di Noia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07433v1-abstract-short" style="display: inline;"> Item recommendation (the task of predicting if a user may interact with new items from the catalogue in a recommendation system) and link prediction (the task of identifying missing links in a knowledge graph) have long been regarded as distinct problems. In this work, we show that the item recommendation problem can be seen as an instance of the link prediction problem, where entities in the grap&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07433v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07433v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07433v1-abstract-full" style="display: none;"> Item recommendation (the task of predicting if a user may interact with new items from the catalogue in a recommendation system) and link prediction (the task of identifying missing links in a knowledge graph) have long been regarded as distinct problems. In this work, we show that the item recommendation problem can be seen as an instance of the link prediction problem, where entities in the graph represent users and items, and the task consists of predicting missing instances of the relation type &lt;&lt;interactsWith&gt;&gt;. In a preliminary attempt to demonstrate the assumption, we decide to test three popular factorisation-based link prediction models on the item recommendation task, showing that their predictive accuracy is competitive with ten state-of-the-art recommendation models. The purpose is to show how the former may be seamlessly and effectively applied to the recommendation task without any specific modification to their architectures. Finally, while beginning to unveil the key reasons behind the recommendation performance of the selected link prediction models, we explore different settings for their hyper-parameter values, paving the way for future directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07433v1-abstract-full').style.display = 'none'; document.getElementById('2409.07433v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08670">arXiv:2408.08670</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.08670">pdf</a>, <a href="https://arxiv.org/format/2408.08670">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Layer Selection for Efficient Vision Transformer Fine-Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Devoto%2C+A">Alessio Devoto</a>, <a href="/search/cs?searchtype=author&amp;query=Alvetreti%2C+F">Federico Alvetreti</a>, <a href="/search/cs?searchtype=author&amp;query=Pomponi%2C+J">Jary Pomponi</a>, <a href="/search/cs?searchtype=author&amp;query=Di+Lorenzo%2C+P">Paolo Di Lorenzo</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Scardapane%2C+S">Simone Scardapane</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08670v1-abstract-short" style="display: inline;"> Recently, foundation models based on Vision Transformers (ViTs) have become widely available. However, their fine-tuning process is highly resource-intensive, and it hinders their adoption in several edge or low-energy applications. To this end, in this paper we introduce an efficient fine-tuning method for ViTs called $\textbf{ALaST}$ (&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08670v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08670v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08670v1-abstract-full" style="display: none;"> Recently, foundation models based on Vision Transformers (ViTs) have become widely available. However, their fine-tuning process is highly resource-intensive, and it hinders their adoption in several edge or low-energy applications. To this end, in this paper we introduce an efficient fine-tuning method for ViTs called $\textbf{ALaST}$ ($\textit{Adaptive Layer Selection Fine-Tuning for Vision Transformers}$) to speed up the fine-tuning process while reducing computational cost, memory load, and training time. Our approach is based on the observation that not all layers are equally critical during fine-tuning, and their importance varies depending on the current mini-batch. Therefore, at each fine-tuning step, we adaptively estimate the importance of all layers and we assign what we call ``compute budgets&#39;&#39; accordingly. Layers that were allocated lower budgets are either trained with a reduced number of input tokens or kept frozen. Freezing a layer reduces the computational cost and memory usage by preventing updates to its weights, while discarding tokens removes redundant data, speeding up processing and reducing memory requirements. We show that this adaptive compute allocation enables a nearly-optimal schedule for distributing computational resources across layers, resulting in substantial reductions in training time (up to 1.5x), FLOPs (up to 2x), and memory load (up to 2x) compared to traditional full fine-tuning approaches. Additionally, it can be successfully combined with other parameter-efficient fine-tuning methods, such as LoRA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08670v1-abstract-full').style.display = 'none'; document.getElementById('2408.08670v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15516">arXiv:2407.15516</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.15516">pdf</a>, <a href="https://arxiv.org/format/2407.15516">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Attention Is All You Need But You Don&#39;t Need All Of It For Inference of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tyukin%2C+G">Georgy Tyukin</a>, <a href="/search/cs?searchtype=author&amp;query=Dovonon%2C+G+J">Gbetondji J-S Dovonon</a>, <a href="/search/cs?searchtype=author&amp;query=Kaddour%2C+J">Jean Kaddour</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15516v1-abstract-short" style="display: inline;"> The inference demand for LLMs has skyrocketed in recent months, and serving models with low latencies remains challenging due to the quadratic input length complexity of the attention layers. In this work, we investigate the effect of dropping MLP and attention layers at inference time on the performance of Llama-v2 models. We find that dropping dreeper attention layers only marginally decreases p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15516v1-abstract-full').style.display = 'inline'; document.getElementById('2407.15516v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15516v1-abstract-full" style="display: none;"> The inference demand for LLMs has skyrocketed in recent months, and serving models with low latencies remains challenging due to the quadratic input length complexity of the attention layers. In this work, we investigate the effect of dropping MLP and attention layers at inference time on the performance of Llama-v2 models. We find that dropping dreeper attention layers only marginally decreases performance but leads to the best speedups alongside dropping entire layers. For example, removing 33\% of attention layers in a 13B Llama2 model results in a 1.8\% drop in average performance over the OpenLLM benchmark. We also observe that skipping layers except the latter layers reduces performances for more layers skipped, except for skipping the attention layers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15516v1-abstract-full').style.display = 'none'; document.getElementById('2407.15516v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14425">arXiv:2406.14425</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.14425">pdf</a>, <a href="https://arxiv.org/format/2406.14425">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SynDARin: Synthesising Datasets for Automated Reasoning in Low-Resource Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ghazaryan%2C+G">Gayane Ghazaryan</a>, <a href="/search/cs?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14425v3-abstract-short" style="display: inline;"> Question Answering (QA) datasets have been instrumental in developing and evaluating Large Language Model (LLM) capabilities. However, such datasets are scarce for languages other than English due to the cost and difficulties of collection and manual annotation. This means that producing novel models and measuring the performance of multilingual LLMs in low-resource languages is challenging. To mi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14425v3-abstract-full').style.display = 'inline'; document.getElementById('2406.14425v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14425v3-abstract-full" style="display: none;"> Question Answering (QA) datasets have been instrumental in developing and evaluating Large Language Model (LLM) capabilities. However, such datasets are scarce for languages other than English due to the cost and difficulties of collection and manual annotation. This means that producing novel models and measuring the performance of multilingual LLMs in low-resource languages is challenging. To mitigate this, we propose $\textbf{S}$yn$\textbf{DAR}$in, a method for generating and validating QA datasets for low-resource languages. We utilize parallel content mining to obtain $\textit{human-curated}$ paragraphs between English and the target language. We use the English data as context to $\textit{generate}$ synthetic multiple-choice (MC) question-answer pairs, which are automatically translated and further validated for quality. Combining these with their designated non-English $\textit{human-curated}$ paragraphs form the final QA dataset. The method allows to maintain the content quality, reduces the likelihood of factual errors, and circumvents the need for costly annotation. To test the method, we created a QA dataset with $1.2$K samples for the Armenian language. The human evaluation shows that $98\%$ of the generated English data maintains quality and diversity in the question types and topics, while the translation validation pipeline can filter out $\sim70\%$ of data with poor quality. We use the dataset to benchmark state-of-the-art LLMs, showing their inability to achieve human accuracy with some model performances closer to random chance. This shows that the generated dataset is non-trivial and can be used to evaluate reasoning capabilities in low-resource language. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14425v3-abstract-full').style.display = 'none'; document.getElementById('2406.14425v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13229">arXiv:2406.13229</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.13229">pdf</a>, <a href="https://arxiv.org/format/2406.13229">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Probing the Emergence of Cross-lingual Alignment during LLM Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hetong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Ponti%2C+E+M">Edoardo M. Ponti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13229v1-abstract-short" style="display: inline;"> Multilingual Large Language Models (LLMs) achieve remarkable levels of zero-shot cross-lingual transfer performance. We speculate that this is predicated on their ability to align languages without explicit supervision from parallel sentences. While representations of translationally equivalent sentences in different languages are known to be similar after convergence, however, it remains unclear&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13229v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13229v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13229v1-abstract-full" style="display: none;"> Multilingual Large Language Models (LLMs) achieve remarkable levels of zero-shot cross-lingual transfer performance. We speculate that this is predicated on their ability to align languages without explicit supervision from parallel sentences. While representations of translationally equivalent sentences in different languages are known to be similar after convergence, however, it remains unclear how such cross-lingual alignment emerges during pre-training of LLMs. Our study leverages intrinsic probing techniques, which identify which subsets of neurons encode linguistic features, to correlate the degree of cross-lingual neuron overlap with the zero-shot cross-lingual transfer performance for a given model. In particular, we rely on checkpoints of BLOOM, a multilingual autoregressive LLM, across different training steps and model scales. We observe a high correlation between neuron overlap and downstream performance, which supports our hypothesis on the conditions leading to effective cross-lingual transfer. Interestingly, we also detect a degradation of both implicit alignment and multilingual abilities in certain phases of the pre-training process, providing new insights into the multilingual pretraining dynamics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13229v1-abstract-full').style.display = 'none'; document.getElementById('2406.13229v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of the Association for Computational Linguistics: ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11430">arXiv:2406.11430</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.11430">pdf</a>, <a href="https://arxiv.org/format/2406.11430">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Simple and Effective $L_2$ Norm-Based Strategy for KV Cache Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Devoto%2C+A">Alessio Devoto</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yu Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Scardapane%2C+S">Simone Scardapane</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11430v4-abstract-short" style="display: inline;"> The deployment of large language models (LLMs) is often hindered by the extensive memory requirements of the Key-Value (KV) cache, especially as context lengths increase. Existing approaches to reduce the KV cache size involve either fine-tuning the model to learn a compression strategy or leveraging attention scores to reduce the sequence length. We analyse the attention distributions in decoder-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11430v4-abstract-full').style.display = 'inline'; document.getElementById('2406.11430v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11430v4-abstract-full" style="display: none;"> The deployment of large language models (LLMs) is often hindered by the extensive memory requirements of the Key-Value (KV) cache, especially as context lengths increase. Existing approaches to reduce the KV cache size involve either fine-tuning the model to learn a compression strategy or leveraging attention scores to reduce the sequence length. We analyse the attention distributions in decoder-only Transformers-based models and observe that attention allocation patterns stay consistent across most layers. Surprisingly, we find a clear correlation between the $L_2$ and the attention scores over cached KV pairs, where a low $L_2$ of a key embedding usually leads to a high attention score during decoding. This finding indicates that the influence of a KV pair is potentially determined by the key embedding itself before being queried. Based on this observation, we compress the KV cache based on the $L_2$ of key embeddings. Our experimental results show that this simple strategy can reduce the KV cache size by 50% on language modelling and needle-in-a-haystack tasks and 90% on passkey retrieval tasks without losing accuracy. Moreover, without relying on the attention scores, this approach remains compatible with FlashAttention, enabling broader applicability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11430v4-abstract-full').style.display = 'none'; document.getElementById('2406.11430v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is an extended version of a paper published in the proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP 2024); this version was presented at the 4th NeurIPS Workshop on Efficient Natural Language and Speech Processing (ENLSP-IV)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04127">arXiv:2406.04127</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.04127">pdf</a>, <a href="https://arxiv.org/format/2406.04127">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Are We Done with MMLU? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=Leang%2C+J+O+J">Joshua Ong Jun Leang</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+G">Giwon Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Devoto%2C+A">Alessio Devoto</a>, <a href="/search/cs?searchtype=author&amp;query=Mancino%2C+A+C+M">Alberto Carlo Maria Mancino</a>, <a href="/search/cs?searchtype=author&amp;query=Saxena%2C+R">Rohit Saxena</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xuanli He</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yu Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+X">Xiaotang Du</a>, <a href="/search/cs?searchtype=author&amp;query=Madani%2C+M+R+G">Mohammad Reza Ghasemi Madani</a>, <a href="/search/cs?searchtype=author&amp;query=Barale%2C+C">Claire Barale</a>, <a href="/search/cs?searchtype=author&amp;query=McHardy%2C+R">Robert McHardy</a>, <a href="/search/cs?searchtype=author&amp;query=Harris%2C+J">Joshua Harris</a>, <a href="/search/cs?searchtype=author&amp;query=Kaddour%2C+J">Jean Kaddour</a>, <a href="/search/cs?searchtype=author&amp;query=van+Krieken%2C+E">Emile van Krieken</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04127v3-abstract-short" style="display: inline;"> Maybe not. We identify and analyse errors in the popular Massive Multitask Language Understanding (MMLU) benchmark. Even though MMLU is widely adopted, our analysis demonstrates numerous ground truth errors that obscure the true capabilities of LLMs. For example, we find that 57% of the analysed questions in the Virology subset contain errors. To address this issue, we introduce a comprehensive fr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04127v3-abstract-full').style.display = 'inline'; document.getElementById('2406.04127v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04127v3-abstract-full" style="display: none;"> Maybe not. We identify and analyse errors in the popular Massive Multitask Language Understanding (MMLU) benchmark. Even though MMLU is widely adopted, our analysis demonstrates numerous ground truth errors that obscure the true capabilities of LLMs. For example, we find that 57% of the analysed questions in the Virology subset contain errors. To address this issue, we introduce a comprehensive framework for identifying dataset errors using a novel error annotation protocol. Then, we create MMLU-Redux, which is a subset of 5,700 manually re-annotated questions across all 57 MMLU subjects. We estimate that 6.49% of MMLU questions contain errors. Using MMLU-Redux, we demonstrate significant discrepancies with the model performance metrics that were originally reported. Our results strongly advocate for revising MMLU&#39;s error-ridden questions to enhance its future utility and reliability as a benchmark. https://huggingface.co/datasets/edinburgh-dawg/mmlu-redux-2.0. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04127v3-abstract-full').style.display = 'none'; document.getElementById('2406.04127v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18028">arXiv:2405.18028</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.18028">pdf</a>, <a href="https://arxiv.org/format/2405.18028">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Edinburgh Clinical NLP at MEDIQA-CORR 2024: Guiding Large Language Models with Hints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+C">Chaeeun Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Daines%2C+L">Luke Daines</a>, <a href="/search/cs?searchtype=author&amp;query=Simpson%2C+T+I">T. Ian Simpson</a>, <a href="/search/cs?searchtype=author&amp;query=Alex%2C+B">Beatrice Alex</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18028v1-abstract-short" style="display: inline;"> The MEDIQA-CORR 2024 shared task aims to assess the ability of Large Language Models (LLMs) to identify and correct medical errors in clinical notes. In this study, we evaluate the capability of general LLMs, specifically GPT-3.5 and GPT-4, to identify and correct medical errors with multiple prompting strategies. Recognising the limitation of LLMs in generating accurate corrections only via promp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18028v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18028v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18028v1-abstract-full" style="display: none;"> The MEDIQA-CORR 2024 shared task aims to assess the ability of Large Language Models (LLMs) to identify and correct medical errors in clinical notes. In this study, we evaluate the capability of general LLMs, specifically GPT-3.5 and GPT-4, to identify and correct medical errors with multiple prompting strategies. Recognising the limitation of LLMs in generating accurate corrections only via prompting strategies, we propose incorporating error-span predictions from a smaller, fine-tuned model in two ways: 1) by presenting it as a hint in the prompt and 2) by framing it as multiple-choice questions from which the LLM can choose the best correction. We found that our proposed prompting strategies significantly improve the LLM&#39;s ability to generate corrections. Our best-performing solution with 8-shot + CoT + hints ranked sixth in the shared task leaderboard. Additionally, our comprehensive analyses show the impact of the location of the error sentence, the prompted role, and the position of the multiple-choice option on the accuracy of the LLM. This prompts further questions about the readiness of LLM to be implemented in real-world clinical settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18028v1-abstract-full').style.display = 'none'; document.getElementById('2405.18028v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.15984">arXiv:2405.15984</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.15984">pdf</a>, <a href="https://arxiv.org/format/2405.15984">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evaluating and Safeguarding the Adversarial Robustness of Retrieval-Based In-Context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yu%2C+S">Simon Yu</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jie He</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+J+Z">Jeff Z. Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.15984v4-abstract-short" style="display: inline;"> With the emergence of large language models, such as LLaMA and OpenAI GPT-3, In-Context Learning (ICL) gained significant attention due to its effectiveness and efficiency. However, ICL is very sensitive to the choice, order, and verbaliser used to encode the demonstrations in the prompt. Retrieval-Augmented ICL methods try to address this problem by leveraging retrievers to extract semantically r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15984v4-abstract-full').style.display = 'inline'; document.getElementById('2405.15984v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.15984v4-abstract-full" style="display: none;"> With the emergence of large language models, such as LLaMA and OpenAI GPT-3, In-Context Learning (ICL) gained significant attention due to its effectiveness and efficiency. However, ICL is very sensitive to the choice, order, and verbaliser used to encode the demonstrations in the prompt. Retrieval-Augmented ICL methods try to address this problem by leveraging retrievers to extract semantically related examples as demonstrations. While this approach yields more accurate results, its robustness against various types of adversarial attacks, including perturbations on test samples, demonstrations, and retrieved data, remains under-explored. Our study reveals that retrieval-augmented models can enhance robustness against test sample attacks, outperforming vanilla ICL with a 4.87% reduction in Attack Success Rate (ASR); however, they exhibit overconfidence in the demonstrations, leading to a 2% increase in ASR for demonstration attacks. Adversarial training can help improve the robustness of ICL methods to adversarial attacks; however, such a training scheme can be too costly in the context of LLMs. As an alternative, we introduce an effective training-free adversarial defence method, DARD, which enriches the example pool with those attacked samples. We show that DARD yields improvements in performance and robustness, achieving a 15% reduction in ASR over the baselines. Code and data are released to encourage further research: https://github.com/simonucl/adv-retreival-icl <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15984v4-abstract-full').style.display = 'none'; document.getElementById('2405.15984v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">COLM 2024, 31 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.19597">arXiv:2404.19597</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.19597">pdf</a>, <a href="https://arxiv.org/format/2404.19597">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> TuBA: Cross-Lingual Transferability of Backdoor Attacks in LLMs with Instruction Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xuanli He</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Q">Qiongkai Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Stenetorp%2C+P">Pontus Stenetorp</a>, <a href="/search/cs?searchtype=author&amp;query=Rubinstein%2C+B+I+P">Benjamin I. P. Rubinstein</a>, <a href="/search/cs?searchtype=author&amp;query=Cohn%2C+T">Trevor Cohn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.19597v2-abstract-short" style="display: inline;"> The implications of backdoor attacks on English-centric large language models (LLMs) have been widely examined - such attacks can be achieved by embedding malicious behaviors during training and activated under specific conditions that trigger malicious outputs. Despite the increasing support for multilingual capabilities in open-source and proprietary LLMs, the impact of backdoor attacks on these&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19597v2-abstract-full').style.display = 'inline'; document.getElementById('2404.19597v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.19597v2-abstract-full" style="display: none;"> The implications of backdoor attacks on English-centric large language models (LLMs) have been widely examined - such attacks can be achieved by embedding malicious behaviors during training and activated under specific conditions that trigger malicious outputs. Despite the increasing support for multilingual capabilities in open-source and proprietary LLMs, the impact of backdoor attacks on these systems remains largely under-explored. Our research focuses on cross-lingual backdoor attacks against multilingual LLMs, particularly investigating how poisoning the instruction-tuning data for one or two languages can affect the outputs for languages whose instruction-tuning data were not poisoned. Despite its simplicity, our empirical analysis reveals that our method exhibits remarkable efficacy in models like mT5 and GPT-4o, with high attack success rates, surpassing 90% in more than 7 out of 12 languages across various scenarios. Our findings also indicate that more powerful models show increased susceptibility to transferable cross-lingual backdoor attacks, which also applies to LLMs predominantly pre-trained on English data, such as Llama2, Llama3, and Gemma. Moreover, our experiments demonstrate 1) High Transferability: the backdoor mechanism operates successfully in cross-lingual response scenarios across 26 languages, achieving an average attack success rate of 99%, and 2) Robustness: the proposed attack remains effective even after defenses are applied. These findings expose critical security vulnerabilities in multilingual LLMs and highlight the urgent need for more robust, targeted defense strategies to address the unique challenges posed by cross-lingual backdoor transfer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19597v2-abstract-full').style.display = 'none'; document.getElementById('2404.19597v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16041">arXiv:2404.16041</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.16041">pdf</a>, <a href="https://arxiv.org/format/2404.16041">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Forklift: An Extensible Neural Lifter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Armengol-Estap%C3%A9%2C+J">Jordi Armengol-Estap茅</a>, <a href="/search/cs?searchtype=author&amp;query=Rocha%2C+R+C+O">Rodrigo C. O. Rocha</a>, <a href="/search/cs?searchtype=author&amp;query=Woodruff%2C+J">Jackson Woodruff</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=O%27Boyle%2C+M+F+P">Michael F. P. O&#39;Boyle</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16041v1-abstract-short" style="display: inline;"> The escalating demand to migrate legacy software across different Instruction Set Architectures (ISAs) has driven the development of assembly-to-assembly translators to map between their respective assembly languages. However, the development of these tools requires substantial engineering effort. State-of-the-art approaches use lifting, a technique where source assembly code is translated to an a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16041v1-abstract-full').style.display = 'inline'; document.getElementById('2404.16041v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16041v1-abstract-full" style="display: none;"> The escalating demand to migrate legacy software across different Instruction Set Architectures (ISAs) has driven the development of assembly-to-assembly translators to map between their respective assembly languages. However, the development of these tools requires substantial engineering effort. State-of-the-art approaches use lifting, a technique where source assembly code is translated to an architecture-independent intermediate representation (IR) (for example, the LLVM IR) and use a pre-existing compiler to recompile the IR to the target ISA. However, the hand-written rules these lifters employ are sensitive to the particular compiler and optimization level used to generate the code and require significant engineering effort to support each new ISA. We propose Forklift, the first neural lifter that learns how to translate assembly to LLVM IR using a token-level encoder-decoder Transformer. We show how to incrementally add support to new ISAs by fine tuning the assembly encoder and freezing the IR decoder, improving the overall accuracy and efficiency. We collect millions of parallel LLVM IR, x86, ARM, and RISC-V programs across compilers and optimization levels to train Forklift and set up an input/output-based accuracy harness. We evaluate Forklift on two challenging benchmark suites and translate 2.5x more x86 programs than a state-of-the-art hand-written lifter and 4.4x more x86 programs than GPT-4 as well as enabling translation from new ISAs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16041v1-abstract-full').style.display = 'none'; document.getElementById('2404.16041v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.08458">arXiv:2404.08458</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.08458">pdf</a>, <a href="https://arxiv.org/format/2404.08458">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On the Independence Assumption in Neurosymbolic Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=van+Krieken%2C+E">Emile van Krieken</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Ponti%2C+E+M">Edoardo M. Ponti</a>, <a href="/search/cs?searchtype=author&amp;query=Vergari%2C+A">Antonio Vergari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.08458v2-abstract-short" style="display: inline;"> State-of-the-art neurosymbolic learning systems use probabilistic reasoning to guide neural networks towards predictions that conform to logical constraints over symbols. Many such systems assume that the probabilities of the considered symbols are conditionally independent given the input to simplify learning and reasoning. We study and criticise this assumption, highlighting how it can hinder op&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08458v2-abstract-full').style.display = 'inline'; document.getElementById('2404.08458v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.08458v2-abstract-full" style="display: none;"> State-of-the-art neurosymbolic learning systems use probabilistic reasoning to guide neural networks towards predictions that conform to logical constraints over symbols. Many such systems assume that the probabilities of the considered symbols are conditionally independent given the input to simplify learning and reasoning. We study and criticise this assumption, highlighting how it can hinder optimisation and prevent uncertainty quantification. We prove that loss functions bias conditionally independent neural networks to become overconfident in their predictions. As a result, they are unable to represent uncertainty over multiple valid options. Furthermore, we prove that these loss functions are difficult to optimise: they are non-convex, and their minima are usually highly disconnected. Our theoretical analysis gives the foundation for replacing the conditional independence assumption and designing more expressive neurosymbolic probabilistic models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08458v2-abstract-full').style.display = 'none'; document.getElementById('2404.08458v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.05904">arXiv:2404.05904</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.05904">pdf</a>, <a href="https://arxiv.org/format/2404.05904">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> The Hallucinations Leaderboard -- An Open Effort to Measure Hallucinations in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hong%2C+G">Giwon Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=Saxena%2C+R">Rohit Saxena</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+X">Xiaotang Du</a>, <a href="/search/cs?searchtype=author&amp;query=Nie%2C+P">Ping Nie</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yu Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Perez-Beltrachini%2C+L">Laura Perez-Beltrachini</a>, <a href="/search/cs?searchtype=author&amp;query=Ryabinin%2C+M">Max Ryabinin</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xuanli He</a>, <a href="/search/cs?searchtype=author&amp;query=Fourrier%2C+C">Cl茅mentine Fourrier</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.05904v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have transformed the Natural Language Processing (NLP) landscape with their remarkable ability to understand and generate human-like text. However, these models are prone to ``hallucinations&#39;&#39; -- outputs that do not align with factual reality or the input context. This paper introduces the Hallucinations Leaderboard, an open initiative to quantitatively measure and com&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05904v2-abstract-full').style.display = 'inline'; document.getElementById('2404.05904v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.05904v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have transformed the Natural Language Processing (NLP) landscape with their remarkable ability to understand and generate human-like text. However, these models are prone to ``hallucinations&#39;&#39; -- outputs that do not align with factual reality or the input context. This paper introduces the Hallucinations Leaderboard, an open initiative to quantitatively measure and compare the tendency of each model to produce hallucinations. The leaderboard uses a comprehensive set of benchmarks focusing on different aspects of hallucinations, such as factuality and faithfulness, across various tasks, including question-answering, summarisation, and reading comprehension. Our analysis provides insights into the performance of different models, guiding researchers and practitioners in choosing the most reliable models for their applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05904v2-abstract-full').style.display = 'none'; document.getElementById('2404.05904v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00484">arXiv:2404.00484</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.00484">pdf</a>, <a href="https://arxiv.org/format/2404.00484">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Edinburgh Clinical NLP at SemEval-2024 Task 2: Fine-tune your model unless you have access to GPT-4 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+G">Giwon Hong</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Daines%2C+L">Luke Daines</a>, <a href="/search/cs?searchtype=author&amp;query=Alex%2C+B">Beatrice Alex</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00484v1-abstract-short" style="display: inline;"> The NLI4CT task assesses Natural Language Inference systems in predicting whether hypotheses entail or contradict evidence from Clinical Trial Reports. In this study, we evaluate various Large Language Models (LLMs) with multiple strategies, including Chain-of-Thought, In-Context Learning, and Parameter-Efficient Fine-Tuning (PEFT). We propose a PEFT method to improve the consistency of LLMs by me&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00484v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00484v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00484v1-abstract-full" style="display: none;"> The NLI4CT task assesses Natural Language Inference systems in predicting whether hypotheses entail or contradict evidence from Clinical Trial Reports. In this study, we evaluate various Large Language Models (LLMs) with multiple strategies, including Chain-of-Thought, In-Context Learning, and Parameter-Efficient Fine-Tuning (PEFT). We propose a PEFT method to improve the consistency of LLMs by merging adapters that were fine-tuned separately using triplet and language modelling objectives. We found that merging the two PEFT adapters improves the F1 score (+0.0346) and consistency (+0.152) of the LLMs. However, our novel methods did not produce more accurate results than GPT-4 in terms of faithfulness and consistency. Averaging the three metrics, GPT-4 ranks joint-first in the competition with 0.8328. Finally, our contamination analysis with GPT-4 indicates that there was no test data leakage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00484v1-abstract-full').style.display = 'none'; document.getElementById('2404.00484v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.20288">arXiv:2403.20288</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.20288">pdf</a>, <a href="https://arxiv.org/format/2403.20288">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Can LLMs Correct Physicians, Yet? Investigating Effective Interaction Methods in the Medical Domain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sayin%2C+B">Burcu Sayin</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Staiano%2C+J">Jacopo Staiano</a>, <a href="/search/cs?searchtype=author&amp;query=Passerini%2C+A">Andrea Passerini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.20288v2-abstract-short" style="display: inline;"> We explore the potential of Large Language Models (LLMs) to assist and potentially correct physicians in medical decision-making tasks. We evaluate several LLMs, including Meditron, Llama2, and Mistral, to analyze the ability of these models to interact effectively with physicians across different scenarios. We consider questions from PubMedQA and several tasks, ranging from binary (yes/no) respon&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.20288v2-abstract-full').style.display = 'inline'; document.getElementById('2403.20288v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.20288v2-abstract-full" style="display: none;"> We explore the potential of Large Language Models (LLMs) to assist and potentially correct physicians in medical decision-making tasks. We evaluate several LLMs, including Meditron, Llama2, and Mistral, to analyze the ability of these models to interact effectively with physicians across different scenarios. We consider questions from PubMedQA and several tasks, ranging from binary (yes/no) responses to long answer generation, where the answer of the model is produced after an interaction with a physician. Our findings suggest that prompt design significantly influences the downstream accuracy of LLMs and that LLMs can provide valuable feedback to physicians, challenging incorrect diagnoses and contributing to more accurate decision-making. For example, when the physician is accurate 38% of the time, Mistral can produce the correct answer, improving accuracy up to 74% depending on the prompt being used, while Llama2 and Meditron models exhibit greater sensitivity to prompt choice. Our analysis also uncovers the challenges of ensuring that LLM-generated suggestions are pertinent and useful, emphasizing the need for further research in this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.20288v2-abstract-full').style.display = 'none'; document.getElementById('2403.20288v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for oral presentation at NAACL 2024, The 6th Clinical Natural Language Processing Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07965">arXiv:2403.07965</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.07965">pdf</a>, <a href="https://arxiv.org/format/2403.07965">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3233/IA-240035">10.3233/IA-240035 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Conditional computation in neural networks: principles and research trends </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Scardapane%2C+S">Simone Scardapane</a>, <a href="/search/cs?searchtype=author&amp;query=Baiocchi%2C+A">Alessandro Baiocchi</a>, <a href="/search/cs?searchtype=author&amp;query=Devoto%2C+A">Alessio Devoto</a>, <a href="/search/cs?searchtype=author&amp;query=Marsocci%2C+V">Valerio Marsocci</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Pomponi%2C+J">Jary Pomponi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07965v2-abstract-short" style="display: inline;"> This article summarizes principles and ideas from the emerging area of applying \textit{conditional computation} methods to the design of neural networks. In particular, we focus on neural networks that can dynamically activate or de-activate parts of their computational graph conditionally on their input. Examples include the dynamic selection of, e.g., input tokens, layers (or sets of layers), a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07965v2-abstract-full').style.display = 'inline'; document.getElementById('2403.07965v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07965v2-abstract-full" style="display: none;"> This article summarizes principles and ideas from the emerging area of applying \textit{conditional computation} methods to the design of neural networks. In particular, we focus on neural networks that can dynamically activate or de-activate parts of their computational graph conditionally on their input. Examples include the dynamic selection of, e.g., input tokens, layers (or sets of layers), and sub-modules inside each layer (e.g., channels in a convolutional filter). We first provide a general formalism to describe these techniques in an uniform way. Then, we introduce three notable implementations of these principles: mixture-of-experts (MoEs) networks, token selection mechanisms, and early-exit neural networks. The paper aims to provide a tutorial-like introduction to this growing field. To this end, we analyze the benefits of these modular designs in terms of efficiency, explainability, and transfer learning, with a focus on emerging applicative areas ranging from automated scientific discovery to semantic communication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07965v2-abstract-full').style.display = 'none'; document.getElementById('2403.07965v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Intelligenza Artificiale, vol. Pre-press, pp. 1-16, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03230">arXiv:2403.03230</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.03230">pdf</a>, <a href="https://arxiv.org/format/2403.03230">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41562-024-02046-9">10.1038/s41562-024-02046-9 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Large language models surpass human experts in predicting neuroscience results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaoliang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Rechardt%2C+A">Akilles Rechardt</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Nejad%2C+K+K">Kevin K. Nejad</a>, <a href="/search/cs?searchtype=author&amp;query=Y%C3%A1%C3%B1ez%2C+F">Felipe Y谩帽ez</a>, <a href="/search/cs?searchtype=author&amp;query=Yilmaz%2C+B">Bati Yilmaz</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+K">Kangjoo Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+A+O">Alexandra O. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Borghesani%2C+V">Valentina Borghesani</a>, <a href="/search/cs?searchtype=author&amp;query=Pashkov%2C+A">Anton Pashkov</a>, <a href="/search/cs?searchtype=author&amp;query=Marinazzo%2C+D">Daniele Marinazzo</a>, <a href="/search/cs?searchtype=author&amp;query=Nicholas%2C+J">Jonathan Nicholas</a>, <a href="/search/cs?searchtype=author&amp;query=Salatiello%2C+A">Alessandro Salatiello</a>, <a href="/search/cs?searchtype=author&amp;query=Sucholutsky%2C+I">Ilia Sucholutsky</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Razavi%2C+S">Sepehr Razavi</a>, <a href="/search/cs?searchtype=author&amp;query=Rocca%2C+R">Roberta Rocca</a>, <a href="/search/cs?searchtype=author&amp;query=Yusifov%2C+E">Elkhan Yusifov</a>, <a href="/search/cs?searchtype=author&amp;query=Okalova%2C+T">Tereza Okalova</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+N">Nianlong Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Ferianc%2C+M">Martin Ferianc</a>, <a href="/search/cs?searchtype=author&amp;query=Khona%2C+M">Mikail Khona</a>, <a href="/search/cs?searchtype=author&amp;query=Patil%2C+K+R">Kaustubh R. Patil</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+P">Pui-Shee Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Mata%2C+R">Rui Mata</a> , et al. (14 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03230v4-abstract-short" style="display: inline;"> Scientific discoveries often hinge on synthesizing decades of research, a task that potentially outstrips human information processing capacities. Large language models (LLMs) offer a solution. LLMs trained on the vast scientific literature could potentially integrate noisy yet interrelated findings to forecast novel results better than human experts. To evaluate this possibility, we created Brain&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03230v4-abstract-full').style.display = 'inline'; document.getElementById('2403.03230v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03230v4-abstract-full" style="display: none;"> Scientific discoveries often hinge on synthesizing decades of research, a task that potentially outstrips human information processing capacities. Large language models (LLMs) offer a solution. LLMs trained on the vast scientific literature could potentially integrate noisy yet interrelated findings to forecast novel results better than human experts. To evaluate this possibility, we created BrainBench, a forward-looking benchmark for predicting neuroscience results. We find that LLMs surpass experts in predicting experimental outcomes. BrainGPT, an LLM we tuned on the neuroscience literature, performed better yet. Like human experts, when LLMs were confident in their predictions, they were more likely to be correct, which presages a future where humans and LLMs team together to make discoveries. Our approach is not neuroscience-specific and is transferable to other knowledge-intensive endeavors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03230v4-abstract-full').style.display = 'none'; document.getElementById('2403.03230v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The latest version of this paper has been published at Nature Human Behaviour, please see https://www.nature.com/articles/s41562-024-02046-9</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.01461">arXiv:2403.01461</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.01461">pdf</a>, <a href="https://arxiv.org/format/2403.01461">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Answerability in Retrieval-Augmented Open-Domain Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Abdumalikov%2C+R">Rustam Abdumalikov</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Kementchedjhieva%2C+Y">Yova Kementchedjhieva</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.01461v1-abstract-short" style="display: inline;"> The performance of Open-Domain Question Answering (ODQA) retrieval systems can exhibit sub-optimal behavior, providing text excerpts with varying degrees of irrelevance. Unfortunately, many existing ODQA datasets lack examples specifically targeting the identification of irrelevant text excerpts. Previous attempts to address this gap have relied on a simplistic approach of pairing questions with r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01461v1-abstract-full').style.display = 'inline'; document.getElementById('2403.01461v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.01461v1-abstract-full" style="display: none;"> The performance of Open-Domain Question Answering (ODQA) retrieval systems can exhibit sub-optimal behavior, providing text excerpts with varying degrees of irrelevance. Unfortunately, many existing ODQA datasets lack examples specifically targeting the identification of irrelevant text excerpts. Previous attempts to address this gap have relied on a simplistic approach of pairing questions with random text excerpts. This paper aims to investigate the effectiveness of models trained using this randomized strategy, uncovering an important limitation in their ability to generalize to irrelevant text excerpts with high semantic overlap. As a result, we observed a substantial decrease in predictive accuracy, from 98% to 1%. To address this limitation, we discovered an efficient approach for training models to recognize such excerpts. By leveraging unanswerable pairs from the SQuAD 2.0 dataset, our models achieve a nearly perfect (~100%) accuracy when confronted with these challenging text excerpts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01461v1-abstract-full').style.display = 'none'; document.getElementById('2403.01461v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.17389">arXiv:2402.17389</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.17389">pdf</a>, <a href="https://arxiv.org/format/2402.17389">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FairBelief -- Assessing Harmful Beliefs in Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Setzu%2C+M">Mattia Setzu</a>, <a href="/search/cs?searchtype=author&amp;query=Manerba%2C+M+M">Marta Marchiori Manerba</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Nozza%2C+D">Debora Nozza</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.17389v1-abstract-short" style="display: inline;"> Language Models (LMs) have been shown to inherit undesired biases that might hurt minorities and underrepresented groups if such systems were integrated into real-world applications without careful fairness auditing. This paper proposes FairBelief, an analytical approach to capture and assess beliefs, i.e., propositions that an LM may embed with different degrees of confidence and that covertly in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17389v1-abstract-full').style.display = 'inline'; document.getElementById('2402.17389v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.17389v1-abstract-full" style="display: none;"> Language Models (LMs) have been shown to inherit undesired biases that might hurt minorities and underrepresented groups if such systems were integrated into real-world applications without careful fairness auditing. This paper proposes FairBelief, an analytical approach to capture and assess beliefs, i.e., propositions that an LM may embed with different degrees of confidence and that covertly influence its predictions. With FairBelief, we leverage prompting to study the behavior of several state-of-the-art LMs across different previously neglected axes, such as model scale and likelihood, assessing predictions on a fairness dataset specifically designed to quantify LMs&#39; outputs&#39; hurtfulness. Finally, we conclude with an in-depth qualitative assessment of the beliefs emitted by the models. We apply FairBelief to English LMs, revealing that, although these architectures enable high performances on diverse natural language processing tasks, they show hurtful beliefs about specific genders. Interestingly, training procedure and dataset, model scale, and architecture induce beliefs of different degrees of hurtfulness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17389v1-abstract-full').style.display = 'none'; document.getElementById('2402.17389v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.13991">arXiv:2402.13991</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.13991">pdf</a>, <a href="https://arxiv.org/format/2402.13991">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2024.acl-long.427">10.18653/v1/2024.acl-long.427 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Analysing The Impact of Sequence Composition on Language Model Pre-Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yu Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+Y">Yuanbin Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Staniszewski%2C+K">Konrad Staniszewski</a>, <a href="/search/cs?searchtype=author&amp;query=Tworkowski%2C+S">Szymon Tworkowski</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Wei Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Mi%C5%82o%C5%9B%2C+P">Piotr Mi艂o艣</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yuxiang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.13991v1-abstract-short" style="display: inline;"> Most language model pre-training frameworks concatenate multiple documents into fixed-length sequences and use causal masking to compute the likelihood of each token given its context; this strategy is widely adopted due to its simplicity and efficiency. However, to this day, the influence of the pre-training sequence composition strategy on the generalisation properties of the model remains under&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13991v1-abstract-full').style.display = 'inline'; document.getElementById('2402.13991v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.13991v1-abstract-full" style="display: none;"> Most language model pre-training frameworks concatenate multiple documents into fixed-length sequences and use causal masking to compute the likelihood of each token given its context; this strategy is widely adopted due to its simplicity and efficiency. However, to this day, the influence of the pre-training sequence composition strategy on the generalisation properties of the model remains under-explored. In this work, we find that applying causal masking can lead to the inclusion of distracting information from previous documents during pre-training, which negatively impacts the performance of the models on language modelling and downstream tasks. In intra-document causal masking, the likelihood of each token is only conditioned on the previous tokens in the same document, eliminating potential distracting information from previous documents and significantly improving performance. Furthermore, we find that concatenating related documents can reduce some potential distractions during pre-training, and our proposed efficient retrieval-based sequence construction method, BM25Chunk, can improve in-context learning (+11.6\%), knowledge memorisation (+9.8\%), and context utilisation (+7.2\%) abilities of language models without sacrificing efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13991v1-abstract-full').style.display = 'none'; document.getElementById('2402.13991v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Analysing The Impact of Sequence Composition on Language Model Pre-Training (Zhao et al., ACL 2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.10193">arXiv:2312.10193</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.10193">pdf</a>, <a href="https://arxiv.org/format/2312.10193">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Computation Modules: Granular Conditional Computation For Efficient Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=W%C3%B3jcik%2C+B">Bartosz W贸jcik</a>, <a href="/search/cs?searchtype=author&amp;query=Devoto%2C+A">Alessio Devoto</a>, <a href="/search/cs?searchtype=author&amp;query=Pustelnik%2C+K">Karol Pustelnik</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Scardapane%2C+S">Simone Scardapane</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.10193v2-abstract-short" style="display: inline;"> While transformer models have been highly successful, they are computationally inefficient. We observe that for each layer, the full width of the layer may be needed only for a small subset of tokens inside a batch and that the &#34;effective&#34; width needed to process a token can vary from layer to layer. Motivated by this observation, we introduce the Adaptive Computation Module (ACM), a generic modul&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10193v2-abstract-full').style.display = 'inline'; document.getElementById('2312.10193v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.10193v2-abstract-full" style="display: none;"> While transformer models have been highly successful, they are computationally inefficient. We observe that for each layer, the full width of the layer may be needed only for a small subset of tokens inside a batch and that the &#34;effective&#34; width needed to process a token can vary from layer to layer. Motivated by this observation, we introduce the Adaptive Computation Module (ACM), a generic module that dynamically adapts its computational load to match the estimated difficulty of the input on a per-token basis. An ACM consists of a sequence of learners that progressively refine the output of their preceding counterparts. An additional gating mechanism determines the optimal number of learners to execute for each token. We also propose a distillation technique to replace any pre-trained model with an &#34;ACMized&#34; variant. Our evaluation of transformer models in computer vision and speech recognition demonstrates that substituting layers with ACMs significantly reduces inference costs without degrading the downstream accuracy for a wide interval of user-defined budgets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10193v2-abstract-full').style.display = 'none'; document.getElementById('2312.10193v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.07556">arXiv:2311.07556</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.07556">pdf</a>, <a href="https://arxiv.org/format/2311.07556">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Using Natural Language Explanations to Improve Robustness of In-context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xuanli He</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yuxiang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Camburu%2C+O">Oana-Maria Camburu</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Stenetorp%2C+P">Pontus Stenetorp</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.07556v2-abstract-short" style="display: inline;"> Recent studies demonstrated that large language models (LLMs) can excel in many tasks via in-context learning (ICL). However, recent works show that ICL-prompted models tend to produce inaccurate results when presented with adversarial inputs. In this work, we investigate whether augmenting ICL with natural language explanations (NLEs) improves the robustness of LLMs on adversarial datasets coveri&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07556v2-abstract-full').style.display = 'inline'; document.getElementById('2311.07556v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.07556v2-abstract-full" style="display: none;"> Recent studies demonstrated that large language models (LLMs) can excel in many tasks via in-context learning (ICL). However, recent works show that ICL-prompted models tend to produce inaccurate results when presented with adversarial inputs. In this work, we investigate whether augmenting ICL with natural language explanations (NLEs) improves the robustness of LLMs on adversarial datasets covering natural language inference and paraphrasing identification. We prompt LLMs with a small set of human-generated NLEs to produce further NLEs, yielding more accurate results than both a zero-shot-ICL setting and using only human-generated NLEs. Our results on five popular LLMs (GPT3.5-turbo, Llama2, Vicuna, Zephyr, and Mistral) show that our approach yields over 6% improvement over baseline approaches for eight adversarial datasets: HANS, ISCS, NaN, ST, PICD, PISP, ANLI, and PAWS. Furthermore, previous studies have demonstrated that prompt selection strategies significantly enhance ICL on in-distribution test sets. However, our findings reveal that these strategies do not match the efficacy of our approach for robustness evaluations, resulting in an accuracy drop of 8% compared to the proposed approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07556v2-abstract-full').style.display = 'none'; document.getElementById('2311.07556v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted to ACL2024 (main)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.14418">arXiv:2310.14418</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.14418">pdf</a>, <a href="https://arxiv.org/format/2310.14418">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> REFER: An End-to-end Rationale Extraction Framework for Explanation Regularization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Madani%2C+M+R+G">Mohammad Reza Ghasemi Madani</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.14418v1-abstract-short" style="display: inline;"> Human-annotated textual explanations are becoming increasingly important in Explainable Natural Language Processing. Rationale extraction aims to provide faithful (i.e., reflective of the behavior of the model) and plausible (i.e., convincing to humans) explanations by highlighting the inputs that had the largest impact on the prediction without compromising the performance of the task model. In r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14418v1-abstract-full').style.display = 'inline'; document.getElementById('2310.14418v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.14418v1-abstract-full" style="display: none;"> Human-annotated textual explanations are becoming increasingly important in Explainable Natural Language Processing. Rationale extraction aims to provide faithful (i.e., reflective of the behavior of the model) and plausible (i.e., convincing to humans) explanations by highlighting the inputs that had the largest impact on the prediction without compromising the performance of the task model. In recent works, the focus of training rationale extractors was primarily on optimizing for plausibility using human highlights, while the task model was trained on jointly optimizing for task predictive accuracy and faithfulness. We propose REFER, a framework that employs a differentiable rationale extractor that allows to back-propagate through the rationale extraction process. We analyze the impact of using human highlights during training by jointly training the task model and the rationale extractor. In our experiments, REFER yields significantly better results in terms of faithfulness, plausibility, and downstream task accuracy on both in-distribution and out-of-distribution data. On both e-SNLI and CoS-E, our best setting produces better results in terms of composite normalized relative gain than the previous baselines by 11% and 3%, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14418v1-abstract-full').style.display = 'none'; document.getElementById('2310.14418v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.09045">arXiv:2309.09045</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.09045">pdf</a>, <a href="https://arxiv.org/format/2309.09045">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Temporal Smoothness Regularisers for Neural Link Predictors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dileo%2C+M">Manuel Dileo</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Zignani%2C+M">Matteo Zignani</a>, <a href="/search/cs?searchtype=author&amp;query=Gaito%2C+S">Sabrina Gaito</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.09045v2-abstract-short" style="display: inline;"> Most algorithms for representation learning and link prediction on relational data are designed for static data. However, the data to which they are applied typically evolves over time, including online social networks or interactions between users and items in recommender systems. This is also the case for graph-structured knowledge bases -- knowledge graphs -- which contain facts that are valid&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09045v2-abstract-full').style.display = 'inline'; document.getElementById('2309.09045v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.09045v2-abstract-full" style="display: none;"> Most algorithms for representation learning and link prediction on relational data are designed for static data. However, the data to which they are applied typically evolves over time, including online social networks or interactions between users and items in recommender systems. This is also the case for graph-structured knowledge bases -- knowledge graphs -- which contain facts that are valid only for specific points in time. In such contexts, it becomes crucial to correctly identify missing links at a precise time point, i.e. the temporal prediction link task. Recently, Lacroix et al. and Sadeghian et al. proposed a solution to the problem of link prediction for knowledge graphs under temporal constraints inspired by the canonical decomposition of 4-order tensors, where they regularise the representations of time steps by enforcing temporal smoothing, i.e. by learning similar transformation for adjacent timestamps. However, the impact of the choice of temporal regularisation terms is still poorly understood. In this work, we systematically analyse several choices of temporal smoothing regularisers using linear functions and recurrent architectures. In our experiments, we show that by carefully selecting the temporal smoothing regulariser and regularisation weight, a simple method like TNTComplEx can produce significantly more accurate results than state-of-the-art methods on three widely used temporal link prediction datasets. Furthermore, we evaluate the impact of a wide range of temporal smoothing regularisers on two state-of-the-art temporal link prediction models. Our work shows that simple tensor factorisation models can produce new state-of-the-art results using newly proposed temporal regularisers, highlighting a promising avenue for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09045v2-abstract-full').style.display = 'none'; document.getElementById('2309.09045v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.06585">arXiv:2308.06585</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.06585">pdf</a>, <a href="https://arxiv.org/format/2308.06585">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Approximate Answering of Graph Queries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cochez%2C+M">Michael Cochez</a>, <a href="/search/cs?searchtype=author&amp;query=Alivanistos%2C+D">Dimitrios Alivanistos</a>, <a href="/search/cs?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/cs?searchtype=author&amp;query=Berrendorf%2C+M">Max Berrendorf</a>, <a href="/search/cs?searchtype=author&amp;query=Daza%2C+D">Daniel Daza</a>, <a href="/search/cs?searchtype=author&amp;query=Galkin%2C+M">Mikhail Galkin</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Niepert%2C+M">Mathias Niepert</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+H">Hongyu Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.06585v1-abstract-short" style="display: inline;"> Knowledge graphs (KGs) are inherently incomplete because of incomplete world knowledge and bias in what is the input to the KG. Additionally, world knowledge constantly expands and evolves, making existing facts deprecated or introducing new ones. However, we would still want to be able to answer queries as if the graph were complete. In this chapter, we will give an overview of several methods wh&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06585v1-abstract-full').style.display = 'inline'; document.getElementById('2308.06585v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.06585v1-abstract-full" style="display: none;"> Knowledge graphs (KGs) are inherently incomplete because of incomplete world knowledge and bias in what is the input to the KG. Additionally, world knowledge constantly expands and evolves, making existing facts deprecated or introducing new ones. However, we would still want to be able to answer queries as if the graph were complete. In this chapter, we will give an overview of several methods which have been proposed to answer queries in such a setting. We will first provide an overview of the different query types which can be supported by these methods and datasets typically used for evaluation, as well as an insight into their limitations. Then, we give an overview of the different approaches and describe them in terms of expressiveness, supported graph types, and inference capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06585v1-abstract-full').style.display = 'none'; document.getElementById('2308.06585v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint of Ch. 17 &#34;Approximate Answering of Graph Queries&#34; in &#34;Compendium of Neurosymbolic Artificial Intelligence&#34;, https://ebooks.iospress.nl/ISBN/978-1-64368-406-2</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.06440">arXiv:2307.06440</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.06440">pdf</a>, <a href="https://arxiv.org/format/2307.06440">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> No Train No Gain: Revisiting Efficient Training Algorithms For Transformer-based Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kaddour%2C+J">Jean Kaddour</a>, <a href="/search/cs?searchtype=author&amp;query=Key%2C+O">Oscar Key</a>, <a href="/search/cs?searchtype=author&amp;query=Nawrot%2C+P">Piotr Nawrot</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Kusner%2C+M+J">Matt J. Kusner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.06440v4-abstract-short" style="display: inline;"> The computation necessary for training Transformer-based language models has skyrocketed in recent years. This trend has motivated research on efficient training algorithms designed to improve training, validation, and downstream performance faster than standard training. In this work, we revisit three categories of such algorithms: dynamic architectures (layer stacking, layer dropping), batch sel&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.06440v4-abstract-full').style.display = 'inline'; document.getElementById('2307.06440v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.06440v4-abstract-full" style="display: none;"> The computation necessary for training Transformer-based language models has skyrocketed in recent years. This trend has motivated research on efficient training algorithms designed to improve training, validation, and downstream performance faster than standard training. In this work, we revisit three categories of such algorithms: dynamic architectures (layer stacking, layer dropping), batch selection (selective backprop, RHO loss), and efficient optimizers (Lion, Sophia). When pre-training BERT and T5 with a fixed computation budget using such methods, we find that their training, validation, and downstream gains vanish compared to a baseline with a fully-decayed learning rate. We define an evaluation protocol that enables computation to be done on arbitrary machines by mapping all computation time to a reference machine which we call reference system time. We discuss the limitations of our proposed protocol and release our code to encourage rigorous research in efficient training procedures: https://github.com/JeanKaddour/NoTrainNoGain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.06440v4-abstract-full').style.display = 'none'; document.getElementById('2307.06440v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.03042">arXiv:2307.03042</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.03042">pdf</a>, <a href="https://arxiv.org/format/2307.03042">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Parameter-Efficient Fine-Tuning of LLaMA for the Clinical Domain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Daines%2C+L">Luke Daines</a>, <a href="/search/cs?searchtype=author&amp;query=Hope%2C+T">Tom Hope</a>, <a href="/search/cs?searchtype=author&amp;query=Alex%2C+B">Beatrice Alex</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.03042v3-abstract-short" style="display: inline;"> Adapting pretrained language models to novel domains, such as clinical applications, traditionally involves retraining their entire set of parameters. Parameter-Efficient Fine-Tuning (PEFT) techniques for fine-tuning language models significantly reduce computational requirements by selectively fine-tuning small subsets of parameters. In this study, we propose a two-step PEFT framework and evaluat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03042v3-abstract-full').style.display = 'inline'; document.getElementById('2307.03042v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.03042v3-abstract-full" style="display: none;"> Adapting pretrained language models to novel domains, such as clinical applications, traditionally involves retraining their entire set of parameters. Parameter-Efficient Fine-Tuning (PEFT) techniques for fine-tuning language models significantly reduce computational requirements by selectively fine-tuning small subsets of parameters. In this study, we propose a two-step PEFT framework and evaluate it in the clinical domain. Our approach combines a specialised PEFT adapter layer designed for clinical domain adaptation with another adapter specialised for downstream tasks. We evaluate the framework on multiple clinical outcome prediction datasets, comparing it to clinically trained language models. Our framework achieves a better AUROC score averaged across all clinical downstream tasks compared to clinical language models. In particular, we observe large improvements of 4-5% AUROC in large-scale multilabel classification tasks, such as diagnoses and procedures classification. To our knowledge, this study is the first to provide an extensive empirical analysis of the interplay between PEFT techniques and domain adaptation in an important real-world domain of clinical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03042v3-abstract-full').style.display = 'none'; document.getElementById('2307.03042v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.19979">arXiv:2305.19979</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.19979">pdf</a>, <a href="https://arxiv.org/format/2305.19979">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Knowledge Graph Embeddings in the Biomedical Domain: Are They Useful? A Look at Link Prediction, Rule Learning, and Downstream Polypharmacy Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&amp;query=Grabarczyk%2C+D">Dominik Grabarczyk</a>, <a href="/search/cs?searchtype=author&amp;query=De+Wulf%2C+W">Wolf De Wulf</a>, <a href="/search/cs?searchtype=author&amp;query=Borole%2C+P">Piyush Borole</a>, <a href="/search/cs?searchtype=author&amp;query=Alfaro%2C+J+A">Javier Antonio Alfaro</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Vergari%2C+A">Antonio Vergari</a>, <a href="/search/cs?searchtype=author&amp;query=Rajan%2C+A">Ajitha Rajan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.19979v2-abstract-short" style="display: inline;"> Knowledge graphs are powerful tools for representing and organising complex biomedical data. Several knowledge graph embedding algorithms have been proposed to learn from and complete knowledge graphs. However, a recent study demonstrates the limited efficacy of these embedding algorithms when applied to biomedical knowledge graphs, raising the question of whether knowledge graph embeddings have l&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19979v2-abstract-full').style.display = 'inline'; document.getElementById('2305.19979v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.19979v2-abstract-full" style="display: none;"> Knowledge graphs are powerful tools for representing and organising complex biomedical data. Several knowledge graph embedding algorithms have been proposed to learn from and complete knowledge graphs. However, a recent study demonstrates the limited efficacy of these embedding algorithms when applied to biomedical knowledge graphs, raising the question of whether knowledge graph embeddings have limitations in biomedical settings. This study aims to apply state-of-the-art knowledge graph embedding models in the context of a recent biomedical knowledge graph, BioKG, and evaluate their performance and potential downstream uses. We achieve a three-fold improvement in terms of performance based on the HITS@10 score over previous work on the same biomedical knowledge graph. Additionally, we provide interpretable predictions through a rule-based method. We demonstrate that knowledge graph embedding models are applicable in practice by evaluating the best-performing model on four tasks that represent real-life polypharmacy situations. Results suggest that knowledge learnt from large biomedical knowledge graphs can be transferred to such downstream use cases. Our code is available at https://github.com/aryopg/biokge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19979v2-abstract-full').style.display = 'none'; document.getElementById('2305.19979v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.13235">arXiv:2305.13235</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.13235">pdf</a>, <a href="https://arxiv.org/format/2305.13235">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SPARSEFIT: Few-shot Prompting with Sparse Fine-tuning for Jointly Generating Predictions and Natural Language Explanations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Solano%2C+J">Jesus Solano</a>, <a href="/search/cs?searchtype=author&amp;query=Sanni%2C+M">Mardhiyah Sanni</a>, <a href="/search/cs?searchtype=author&amp;query=Camburu%2C+O">Oana-Maria Camburu</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.13235v3-abstract-short" style="display: inline;"> Models that generate natural language explanations (NLEs) for their predictions have recently gained increasing interest. However, this approach usually demands large datasets of human-written NLEs for the ground-truth answers at training time, which can be expensive and potentially infeasible for some applications. When only a few NLEs are available (a few-shot setup), fine-tuning pre-trained lan&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13235v3-abstract-full').style.display = 'inline'; document.getElementById('2305.13235v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.13235v3-abstract-full" style="display: none;"> Models that generate natural language explanations (NLEs) for their predictions have recently gained increasing interest. However, this approach usually demands large datasets of human-written NLEs for the ground-truth answers at training time, which can be expensive and potentially infeasible for some applications. When only a few NLEs are available (a few-shot setup), fine-tuning pre-trained language models (PLMs) in conjunction with prompt-based learning has recently shown promising results. However, PLMs typically have billions of parameters, making full fine-tuning expensive. We propose SparseFit, a sparse few-shot fine-tuning strategy that leverages discrete prompts to jointly generate predictions and NLEs. We experiment with SparseFit on three sizes of the T5 language model and four datasets and compare it against existing state-of-the-art Parameter-Efficient Fine-Tuning (PEFT) techniques. We find that fine-tuning only 6.8% of the model parameters leads to competitive results for both the task performance and the quality of the generated NLEs compared to full fine-tuning of the model and produces better results on average than other PEFT methods in terms of predictive accuracy and NLE quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13235v3-abstract-full').style.display = 'none'; document.getElementById('2305.13235v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ACL 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.13214">arXiv:2305.13214</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.13214">pdf</a>, <a href="https://arxiv.org/format/2305.13214">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Atomic Inference for NLI with Generated Facts as Atoms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Stacey%2C+J">Joe Stacey</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Dubossarsky%2C+H">Haim Dubossarsky</a>, <a href="/search/cs?searchtype=author&amp;query=Camburu%2C+O">Oana-Maria Camburu</a>, <a href="/search/cs?searchtype=author&amp;query=Rei%2C+M">Marek Rei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.13214v2-abstract-short" style="display: inline;"> With recent advances, neural models can achieve human-level performance on various natural language tasks. However, there are no guarantees that any explanations from these models are faithful, i.e. that they reflect the inner workings of the model. Atomic inference overcomes this issue, providing interpretable and faithful model decisions. This approach involves making predictions for different c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13214v2-abstract-full').style.display = 'inline'; document.getElementById('2305.13214v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.13214v2-abstract-full" style="display: none;"> With recent advances, neural models can achieve human-level performance on various natural language tasks. However, there are no guarantees that any explanations from these models are faithful, i.e. that they reflect the inner workings of the model. Atomic inference overcomes this issue, providing interpretable and faithful model decisions. This approach involves making predictions for different components (or atoms) of an instance, before using interpretable and deterministic rules to derive the overall prediction based on the individual atom-level predictions. We investigate the effectiveness of using LLM-generated facts as atoms, decomposing Natural Language Inference premises into lists of facts. While directly using generated facts in atomic inference systems can result in worse performance, with 1) a multi-stage fact generation process, and 2) a training regime that incorporates the facts, our fact-based method outperforms other approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13214v2-abstract-full').style.display = 'none'; document.getElementById('2305.13214v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.12313">arXiv:2301.12313</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2301.12313">pdf</a>, <a href="https://arxiv.org/format/2301.12313">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Adapting Neural Link Predictors for Data-Efficient Complex Query Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Daza%2C+D">Daniel Daza</a>, <a href="/search/cs?searchtype=author&amp;query=Cochez%2C+M">Michael Cochez</a>, <a href="/search/cs?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.12313v3-abstract-short" style="display: inline;"> Answering complex queries on incomplete knowledge graphs is a challenging task where a model needs to answer complex logical queries in the presence of missing knowledge. Prior work in the literature has proposed to address this problem by designing architectures trained end-to-end for the complex query answering task with a reasoning process that is hard to interpret while requiring data and reso&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12313v3-abstract-full').style.display = 'inline'; document.getElementById('2301.12313v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.12313v3-abstract-full" style="display: none;"> Answering complex queries on incomplete knowledge graphs is a challenging task where a model needs to answer complex logical queries in the presence of missing knowledge. Prior work in the literature has proposed to address this problem by designing architectures trained end-to-end for the complex query answering task with a reasoning process that is hard to interpret while requiring data and resource-intensive training. Other lines of research have proposed re-using simple neural link predictors to answer complex queries, reducing the amount of training data by orders of magnitude while providing interpretable answers. The neural link predictor used in such approaches is not explicitly optimised for the complex query answering task, implying that its scores are not calibrated to interact together. We propose to address these problems via CQD$^{\mathcal{A}}$, a parameter-efficient score \emph{adaptation} model optimised to re-calibrate neural link prediction scores for the complex query answering task. While the neural link predictor is frozen, the adaptation component -- which only increases the number of model parameters by $0.03\%$ -- is trained on the downstream complex query answering task. Furthermore, the calibration component enables us to support reasoning over queries that include atomic negations, which was previously impossible with link predictors. In our experiments, CQD$^{\mathcal{A}}$ produces significantly more accurate results than current state-of-the-art methods, improving from $34.4$ to $35.1$ Mean Reciprocal Rank values averaged across all datasets and query types while using $\leq 30\%$ of the available training query types. We further show that CQD$^{\mathcal{A}}$ is data-efficient, achieving competitive results with only $1\%$ of the training complex queries, and robust in out-of-domain evaluations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12313v3-abstract-full').style.display = 'none'; document.getElementById('2301.12313v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.09856">arXiv:2211.09856</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.09856">pdf</a>, <a href="https://arxiv.org/format/2211.09856">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Machine Learning-Assisted Recurrence Prediction for Early-Stage Non-Small-Cell Lung Cancer Patients </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Janik%2C+A">Adrianna Janik</a>, <a href="/search/cs?searchtype=author&amp;query=Torrente%2C+M">Maria Torrente</a>, <a href="/search/cs?searchtype=author&amp;query=Costabello%2C+L">Luca Costabello</a>, <a href="/search/cs?searchtype=author&amp;query=Calvo%2C+V">Virginia Calvo</a>, <a href="/search/cs?searchtype=author&amp;query=Walsh%2C+B">Brian Walsh</a>, <a href="/search/cs?searchtype=author&amp;query=Camps%2C+C">Carlos Camps</a>, <a href="/search/cs?searchtype=author&amp;query=Mohamed%2C+S+K">Sameh K. Mohamed</a>, <a href="/search/cs?searchtype=author&amp;query=Ortega%2C+A+L">Ana L. Ortega</a>, <a href="/search/cs?searchtype=author&amp;query=Nov%C3%A1%C4%8Dek%2C+V">V铆t Nov谩膷ek</a>, <a href="/search/cs?searchtype=author&amp;query=Massut%C3%AD%2C+B">Bartomeu Massut铆</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Campelo%2C+M+R+G">M. Rosario Garcia Campelo</a>, <a href="/search/cs?searchtype=author&amp;query=del+Barco%2C+E">Edel del Barco</a>, <a href="/search/cs?searchtype=author&amp;query=Bosch-Barrera%2C+J">Joaquim Bosch-Barrera</a>, <a href="/search/cs?searchtype=author&amp;query=Menasalvas%2C+E">Ernestina Menasalvas</a>, <a href="/search/cs?searchtype=author&amp;query=Timilsina%2C+M">Mohan Timilsina</a>, <a href="/search/cs?searchtype=author&amp;query=Provencio%2C+M">Mariano Provencio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.09856v1-abstract-short" style="display: inline;"> Background: Stratifying cancer patients according to risk of relapse can personalize their care. In this work, we provide an answer to the following research question: How to utilize machine learning to estimate probability of relapse in early-stage non-small-cell lung cancer patients? Methods: For predicting relapse in 1,387 early-stage (I-II), non-small-cell lung cancer (NSCLC) patients from t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.09856v1-abstract-full').style.display = 'inline'; document.getElementById('2211.09856v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.09856v1-abstract-full" style="display: none;"> Background: Stratifying cancer patients according to risk of relapse can personalize their care. In this work, we provide an answer to the following research question: How to utilize machine learning to estimate probability of relapse in early-stage non-small-cell lung cancer patients? Methods: For predicting relapse in 1,387 early-stage (I-II), non-small-cell lung cancer (NSCLC) patients from the Spanish Lung Cancer Group data (65.7 average age, 24.8% females, 75.2% males) we train tabular and graph machine learning models. We generate automatic explanations for the predictions of such models. For models trained on tabular data, we adopt SHAP local explanations to gauge how each patient feature contributes to the predicted outcome. We explain graph machine learning predictions with an example-based method that highlights influential past patients. Results: Machine learning models trained on tabular data exhibit a 76% accuracy for the Random Forest model at predicting relapse evaluated with a 10-fold cross-validation (model was trained 10 times with different independent sets of patients in test, train and validation sets, the reported metrics are averaged over these 10 test sets). Graph machine learning reaches 68% accuracy over a 200-patient, held-out test set, calibrated on a held-out set of 100 patients. Conclusions: Our results show that machine learning models trained on tabular and graph data can enable objective, personalised and reproducible prediction of relapse and therefore, disease outcome in patients with early-stage NSCLC. With further prospective and multisite validation, and additional radiological and molecular data, this prognostic model could potentially serve as a predictive decision support tool for deciding the use of adjuvant treatments in early-stage lung cancer. Keywords: Non-Small-Cell Lung Cancer, Tumor Recurrence Prediction, Machine Learning <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.09856v1-abstract-full').style.display = 'none'; document.getElementById('2211.09856v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.16773">arXiv:2210.16773</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.16773">pdf</a>, <a href="https://arxiv.org/format/2210.16773">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Efficient Memory-Augmented Transformer for Knowledge-Intensive NLP Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yuxiang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yu Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+B">Baotian Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Stenetorp%2C+P">Pontus Stenetorp</a>, <a href="/search/cs?searchtype=author&amp;query=Riedel%2C+S">Sebastian Riedel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.16773v1-abstract-short" style="display: inline;"> Access to external knowledge is essential for many natural language processing tasks, such as question answering and dialogue. Existing methods often rely on a parametric model that stores knowledge in its parameters, or use a retrieval-augmented model that has access to an external knowledge source. Parametric and retrieval-augmented models have complementary strengths in terms of computational e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.16773v1-abstract-full').style.display = 'inline'; document.getElementById('2210.16773v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.16773v1-abstract-full" style="display: none;"> Access to external knowledge is essential for many natural language processing tasks, such as question answering and dialogue. Existing methods often rely on a parametric model that stores knowledge in its parameters, or use a retrieval-augmented model that has access to an external knowledge source. Parametric and retrieval-augmented models have complementary strengths in terms of computational efficiency and predictive accuracy. To combine the strength of both approaches, we propose the Efficient Memory-Augmented Transformer (EMAT) -- it encodes external knowledge into a key-value memory and exploits the fast maximum inner product search for memory querying. We also introduce pre-training tasks that allow EMAT to encode informative key-value representations, and to learn an implicit strategy to integrate multiple memory slots into the transformer. Experiments on various knowledge-intensive tasks such as question answering and dialogue datasets show that, simply augmenting parametric models (T5-base) using our method produces more accurate results (e.g., 25.8 -&gt; 44.3 EM on NQ) while retaining a high throughput (e.g., 1000 queries/s on NQ). Compared to retrieval-augmented models, EMAT runs substantially faster across the board and produces more accurate results on WoW and ELI5. Our code and datasets are available at https://github. com/uclnlp/EMAT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.16773v1-abstract-full').style.display = 'none'; document.getElementById('2210.16773v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2022 main conference long paper. 8 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.15353">arXiv:2210.15353</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.15353">pdf</a>, <a href="https://arxiv.org/format/2210.15353">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Learning Discrete Directed Acyclic Graphs via Backpropagation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wren%2C+A+J">Andrew J. Wren</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Franceschi%2C+L">Luca Franceschi</a>, <a href="/search/cs?searchtype=author&amp;query=Zantedeschi%2C+V">Valentina Zantedeschi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.15353v1-abstract-short" style="display: inline;"> Recently continuous relaxations have been proposed in order to learn Directed Acyclic Graphs (DAGs) from data by backpropagation, instead of using combinatorial optimization. However, a number of techniques for fully discrete backpropagation could instead be applied. In this paper, we explore that direction and propose DAG-DB, a framework for learning DAGs by Discrete Backpropagation. Based on the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.15353v1-abstract-full').style.display = 'inline'; document.getElementById('2210.15353v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.15353v1-abstract-full" style="display: none;"> Recently continuous relaxations have been proposed in order to learn Directed Acyclic Graphs (DAGs) from data by backpropagation, instead of using combinatorial optimization. However, a number of techniques for fully discrete backpropagation could instead be applied. In this paper, we explore that direction and propose DAG-DB, a framework for learning DAGs by Discrete Backpropagation. Based on the architecture of Implicit Maximum Likelihood Estimation [I-MLE, arXiv:2106.01798], DAG-DB adopts a probabilistic approach to the problem, sampling binary adjacency matrices from an implicit probability distribution. DAG-DB learns a parameter for the distribution from the loss incurred by each sample, performing competitively using either of two fully discrete backpropagation techniques, namely I-MLE and Straight-Through Estimation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.15353v1-abstract-full').style.display = 'none'; document.getElementById('2210.15353v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 2 figures, 7 tables. Accepted for NeurIPS 2022 workshops on: Causal Machine Learning for Real-World Impact; and Neuro Causal and Symbolic AI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.04862">arXiv:2209.04862</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.04862">pdf</a>, <a href="https://arxiv.org/format/2209.04862">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Perturbation-Based Gradient Estimation for Discrete Latent Variable Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Franceschi%2C+L">Luca Franceschi</a>, <a href="/search/cs?searchtype=author&amp;query=Niepert%2C+M">Mathias Niepert</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.04862v2-abstract-short" style="display: inline;"> The integration of discrete algorithmic components in deep learning architectures has numerous applications. Recently, Implicit Maximum Likelihood Estimation (IMLE, Niepert, Minervini, and Franceschi 2021), a class of gradient estimators for discrete exponential family distributions, was proposed by combining implicit differentiation through perturbation with the path-wise gradient estimator. Howe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.04862v2-abstract-full').style.display = 'inline'; document.getElementById('2209.04862v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.04862v2-abstract-full" style="display: none;"> The integration of discrete algorithmic components in deep learning architectures has numerous applications. Recently, Implicit Maximum Likelihood Estimation (IMLE, Niepert, Minervini, and Franceschi 2021), a class of gradient estimators for discrete exponential family distributions, was proposed by combining implicit differentiation through perturbation with the path-wise gradient estimator. However, due to the finite difference approximation of the gradients, it is especially sensitive to the choice of the finite difference step size, which needs to be specified by the user. In this work, we present Adaptive IMLE (AIMLE), the first adaptive gradient estimator for complex discrete distributions: it adaptively identifies the target distribution for IMLE by trading off the density of gradient information with the degree of bias in the gradient estimates. We empirically evaluate our estimator on synthetic examples, as well as on Learning to Explain, Discrete Variational Auto-Encoders, and Neural Relational Inference tasks. In our experiments, we show that our adaptive gradient estimator can produce faithful estimates while requiring orders of magnitude fewer samples than other gradient estimators. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.04862v2-abstract-full').style.display = 'none'; document.getElementById('2209.04862v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proceedings of the Thirty-Seventh AAAI Conference on Artificial Intelligence (AAAI 2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.09980">arXiv:2207.09980</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2207.09980">pdf</a>, <a href="https://arxiv.org/format/2207.09980">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ReFactor GNNs: Revisiting Factorisation-based Models from a Message-Passing Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yihong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Mishra%2C+P">Pushkar Mishra</a>, <a href="/search/cs?searchtype=author&amp;query=Franceschi%2C+L">Luca Franceschi</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Stenetorp%2C+P">Pontus Stenetorp</a>, <a href="/search/cs?searchtype=author&amp;query=Riedel%2C+S">Sebastian Riedel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.09980v4-abstract-short" style="display: inline;"> Factorisation-based Models (FMs), such as DistMult, have enjoyed enduring success for Knowledge Graph Completion (KGC) tasks, often outperforming Graph Neural Networks (GNNs). However, unlike GNNs, FMs struggle to incorporate node features and generalise to unseen nodes in inductive settings. Our work bridges the gap between FMs and GNNs by proposing ReFactor GNNs. This new architecture draws upon&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09980v4-abstract-full').style.display = 'inline'; document.getElementById('2207.09980v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.09980v4-abstract-full" style="display: none;"> Factorisation-based Models (FMs), such as DistMult, have enjoyed enduring success for Knowledge Graph Completion (KGC) tasks, often outperforming Graph Neural Networks (GNNs). However, unlike GNNs, FMs struggle to incorporate node features and generalise to unseen nodes in inductive settings. Our work bridges the gap between FMs and GNNs by proposing ReFactor GNNs. This new architecture draws upon both modelling paradigms, which previously were largely thought of as disjoint. Concretely, using a message-passing formalism, we show how FMs can be cast as GNNs by reformulating the gradient descent procedure as message-passing operations, which forms the basis of our ReFactor GNNs. Across a multitude of well-established KGC benchmarks, our ReFactor GNNs achieve comparable transductive performance to FMs, and state-of-the-art inductive performance while using an order of magnitude fewer parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09980v4-abstract-full').style.display = 'none'; document.getElementById('2207.09980v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36th Conference on Neural Information Processing Systems (NeurIPS 2022)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T05; 68T07; 68T50 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7; I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.11432">arXiv:2205.11432</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.11432">pdf</a>, <a href="https://arxiv.org/format/2205.11432">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Logical Reasoning with Span-Level Predictions for Interpretable and Robust NLI Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Stacey%2C+J">Joe Stacey</a>, <a href="/search/cs?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/cs?searchtype=author&amp;query=Dubossarsky%2C+H">Haim Dubossarsky</a>, <a href="/search/cs?searchtype=author&amp;query=Rei%2C+M">Marek Rei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.11432v3-abstract-short" style="display: inline;"> Current Natural Language Inference (NLI) models achieve impressive results, sometimes outperforming humans when evaluating on in-distribution test sets. However, as these models are known to learn from annotation artefacts and dataset biases, it is unclear to what extent the models are learning the task of NLI instead of learning from shallow heuristics in their training data. We address this issu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.11432v3-abstract-full').style.display = 'inline'; document.getElementById('2205.11432v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.11432v3-abstract-full" style="display: none;"> Current Natural Language Inference (NLI) models achieve impressive results, sometimes outperforming humans when evaluating on in-distribution test sets. However, as these models are known to learn from annotation artefacts and dataset biases, it is unclear to what extent the models are learning the task of NLI instead of learning from shallow heuristics in their training data. We address this issue by introducing a logical reasoning framework for NLI, creating highly transparent model decisions that are based on logical rules. Unlike prior work, we show that improved interpretability can be achieved without decreasing the predictive accuracy. We almost fully retain performance on SNLI, while also identifying the exact hypothesis spans that are responsible for each model prediction. Using the e-SNLI human explanations, we verify that our model makes sensible decisions at a span level, despite not using any span labels during training. We can further improve model performance and span-level decisions by using the e-SNLI explanations during training. Finally, our model is more robust in a reduced data setting. When training with only 1,000 examples, out-of-distribution performance improves on the MNLI matched and mismatched validation sets by 13% and 16% relative to the baseline. Training with fewer observations yields further improvements, both in-distribution and out-of-distribution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.11432v3-abstract-full').style.display = 'none'; document.getElementById('2205.11432v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2022</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Minervini%2C+P&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Minervini%2C+P&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Minervini%2C+P&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10